vgritsenko 2002/08/16 21:10:12 Modified: src/java/org/apache/cocoon/components/crawler Tag: cocoon_2_0_3_branch SimpleCocoonCrawlerImpl.java Log: sync with head (fix NPE, close reader) Revision Changes Path No revision No revision 1.9.2.2 +35 -20 xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java Index: SimpleCocoonCrawlerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v retrieving revision 1.9.2.1 retrieving revision 1.9.2.2 diff -u -r1.9.2.1 -r1.9.2.2 --- SimpleCocoonCrawlerImpl.java 7 Aug 2002 10:52:44 -0000 1.9.2.1 +++ SimpleCocoonCrawlerImpl.java 17 Aug 2002 04:10:12 -0000 1.9.2.2 @@ -85,8 +85,7 @@ * @version CVS $Id$ */ public class SimpleCocoonCrawlerImpl extends AbstractLoggable - implements CocoonCrawler, Configurable, Disposable, Recyclable -{ + implements CocoonCrawler, Configurable, Disposable, Recyclable { /** * Config element name specifying expected link content-typ. @@ -162,7 +161,7 @@ /** * Default value of <code>user-agent</code> configuration value. * @see Constants#COMPLETE_NAME - * + * * @since */ public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME; @@ -234,7 +233,7 @@ * @since */ public void configure(Configuration configuration) - throws ConfigurationException { + throws ConfigurationException { Configuration[] children; children = configuration.getChildren(INCLUDE_CONFIG); @@ -250,7 +249,7 @@ } } catch (RESyntaxException rese) { getLogger().error("Cannot create including regular-expression for " + - pattern, rese); + pattern, rese); } } } else { @@ -272,7 +271,7 @@ } } catch (RESyntaxException rese) { getLogger().error("Cannot create excluding regular-expression for " + - pattern, rese); + pattern, rese); } } } else { @@ -414,12 +413,12 @@ */ private void setDefaultExcludeFromCrawling() { String[] EXCLUDE_FROM_CRAWLING_DEFAULT = { - ".*\\.gif(\\?.*)?$", - ".*\\.png(\\?.*)?$", - ".*\\.jpe?g(\\?.*)?$", - ".*\\.js(\\?.*)?$", - ".*\\.css(\\?.*)?$" - }; + ".*\\.gif(\\?.*)?$", + ".*\\.png(\\?.*)?$", + ".*\\.jpe?g(\\?.*)?$", + ".*\\.js(\\?.*)?$", + ".*\\.css(\\?.*)?$" + }; for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) { String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i]; @@ -464,18 +463,27 @@ if (getLogger().isDebugEnabled()) { getLogger().debug("Getting links of URL " + sURL); } + BufferedReader br = null; try { sURL = url.getFile(); URL links = new URL(url, sURL - + ((sURL.indexOf("?") == -1) ? "?" : "&") - + linkViewQuery); + + ((sURL.indexOf("?") == -1) ? "?" : "&") + + linkViewQuery); URLConnection links_url_connection = links.openConnection(); InputStream is = links_url_connection.getInputStream(); - BufferedReader br = new BufferedReader(new InputStreamReader(is)); + br = new BufferedReader(new InputStreamReader(is)); String contentType = links_url_connection.getContentType(); + if (contentType == null) { + if (getLogger().isDebugEnabled()) { + getLogger().debug("Ignoring " + sURL + " (no content type)"); + } + // there is a check on null in the calling method + return null; + } + int index = contentType.indexOf(';'); - if (contentType != null && index != -1) { + if (index != -1) { contentType = contentType.substring(0, index); } if (getLogger().isDebugEnabled()) { @@ -521,6 +529,14 @@ } } catch (IOException ioe) { getLogger().warn("Problems get links of " + url, ioe); + } finally { + if (br != null) { + try { + br.close(); + br = null; + } catch (IOException ignored) { + } + } } return url_links; } @@ -598,8 +614,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a> * @version $Id$ */ - public static class CocoonCrawlerIterator implements Iterator - { + public static class CocoonCrawlerIterator implements Iterator { private SimpleCocoonCrawlerImpl cocoonCrawler;
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]