Author: ab Date: Wed Feb 14 04:15:05 2007 New Revision: 507504 URL: http://svn.apache.org/viewvc?view=rev&rev=507504 Log: Outlink: when null anchor is supplied replace it with an empty string.
ParseSegment: store segment name in parts that we produce here. Content is only read, not stored as one of the outputs. Failure to do that results in NPE in Indexer. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java?view=diff&rev=507504&r1=507503&r2=507504 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java Wed Feb 14 04:15:05 2007 @@ -34,6 +34,7 @@ public Outlink(String toUrl, String anchor, Configuration conf) throws MalformedURLException { this.toUrl = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK).normalize(toUrl, URLNormalizers.SCOPE_OUTLINK); + if (anchor == null) anchor = ""; this.anchor = anchor; } Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?view=diff&rev=507504&r1=507503&r2=507504 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Wed Feb 14 04:15:05 2007 @@ -81,7 +81,10 @@ // compute the new signature byte[] signature = SignatureFactory.getSignature(getConf()).calculate(content, parse); - content.getMetadata().set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature)); + if (parse != null) { + parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature)); + parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, getConf().get(Nutch.SEGMENT_NAME_KEY)); + } if (status.isSuccess()) { try { @@ -95,7 +98,7 @@ } output.collect(key, new ParseImpl(parse.getText(), parse.getData())); } else if (LOG.isWarnEnabled()) { - LOG.warn("Error parsing: "+key+": "+status.toString()); + LOG.warn("Error parsing: " + key + ": "+status.toString()); } } @@ -116,9 +119,8 @@ job.setJobName("parse " + segment); job.setInputPath(new Path(segment, Content.DIR_NAME)); + job.set(Nutch.SEGMENT_NAME_KEY, segment.getName()); job.setInputFormat(SequenceFileInputFormat.class); - job.setInputKeyClass(Text.class); - job.setInputValueClass(Content.class); job.setMapperClass(ParseSegment.class); job.setReducerClass(ParseSegment.class); ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs