Author: jnioche Date: Wed Apr 16 14:56:09 2014 New Revision: 1587935 URL: http://svn.apache.org/r1587935 Log: NUTCH-1743
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1587935&r1=1587934&r2=1587935&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Wed Apr 16 14:56:09 2014 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Current Development +* NUTCH-1743 parsechecker to show outlinks (jnioche, snagel) + * NUTCH-1603 ZIP parser complains about truncated PDF file (snagel) * NUTCH-1720 Duplicate lines in HttpBase.java (Walter Tietze via jnioche) Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java?rev=1587935&r1=1587934&r2=1587935&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java (original) +++ nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java Wed Apr 16 14:56:09 2014 @@ -151,6 +151,12 @@ public class ParserChecker implements To System.out.print(entry.getKey()); LOG.info("\n---------\nParseData\n---------\n"); System.out.print(parse.getData().toString()); + LOG.info("---------\nOutlinks\n---------\n"); + StringBuffer sb = new StringBuffer(); + for (Outlink l : parse.getData().getOutlinks()) { + sb.append(" outlink: ").append(l).append('\n'); + } + System.out.print(sb.toString()); if (dumpText) { LOG.info("---------\nParseText\n---------\n"); System.out.print(parse.getText());