Author: siren Date: Tue Oct 24 07:28:46 2006 New Revision: 467345 URL: http://svn.apache.org/viewvc?view=rev&rev=467345 Log: fix for NUTCH-391
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=467345&r1=467344&r2=467345 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Tue Oct 24 07:28:46 2006 @@ -50,6 +50,9 @@ 17. NUTCH-383 - upgrade to Hadoop 0.7.1 and Lucene 2.0.0. (ab) +18. NUTCH-391 - ParseUtil logs file contents to log file when it cannot + find parser (siren) + ****************************** WARNING !!! ******************************** * This upgrade breaks data format compatibility. A tool 'convertdb' * * was added to migrate existing CrawlDb-s to the new format. Segment data * Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java?view=diff&rev=467345&r1=467344&r2=467345 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java Tue Oct 24 07:28:46 2006 @@ -68,8 +68,8 @@ parsers = this.parserFactory.getParsers(content.getContentType(), ""); } catch (ParserNotFound e) { if (LOG.isWarnEnabled()) { - LOG.warn("No suitable parser found when trying to parse content " + - content); + LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + + " of type " + content.getContentType()); } throw new ParseException(e.getMessage()); } @@ -123,8 +123,8 @@ p = this.parserFactory.getParserById(extId); } catch (ParserNotFound e) { if (LOG.isWarnEnabled()) { - LOG.warn("No suitable parser found when trying to parse content " + - content); + LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + + " of type " + content.getContentType()); } throw new ParseException(e.getMessage()); }