Author: ab Date: Wed May 9 11:54:06 2007 New Revision: 536618 URL: http://svn.apache.org/viewvc?view=rev&rev=536618 Log: Add missing file from the last commit. Spotted by Sami.
Added: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (with props) Added: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java?view=auto&rev=536618 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (added) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Wed May 9 11:54:06 2007 @@ -0,0 +1,71 @@ +package org.apache.nutch.parse; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.Text; + +/** + * A utility class that stores result of a parse. Internally + * a ParseResult stores <[EMAIL PROTECTED] Text}, [EMAIL PROTECTED] Parse}> pairs. + */ +public class ParseResult implements Iterable<Map.Entry<Text, Parse>> { + private Map<Text, Parse> parseMap; + private String originalUrl; + + public static final Log LOG = LogFactory.getLog(ParseResult.class); + + public ParseResult(String originalUrl) { + parseMap = new HashMap<Text, Parse>(); + this.originalUrl = originalUrl; + } + + public static ParseResult createParseResult(String url, Parse parse) { + ParseResult parseResult = new ParseResult(url); + parseResult.put(new Text(url), new ParseText(parse.getText()), parse.getData()); + return parseResult; + } + + public boolean isEmpty() { + return parseMap.isEmpty(); + } + + public int size() { + return parseMap.size(); + } + + public Parse get(String key) { + return get(new Text(key)); + } + + public Parse get(Text key) { + return parseMap.get(key); + } + + public void put(Text key, ParseText text, ParseData data) { + put(key.toString(), text, data); + } + + public void put(String key, ParseText text, ParseData data) { + parseMap.put(new Text(key), new ParseImpl(text, data, key.equals(originalUrl))); + } + + public Iterator<Entry<Text, Parse>> iterator() { + return parseMap.entrySet().iterator(); + } + + public void filter() { + for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) { + Entry<Text, Parse> entry = i.next(); + if (!entry.getValue().getData().getStatus().isSuccess()) { + LOG.warn(entry.getKey() + " is not parsed successfully, filtering"); + i.remove(); + } + } + + } +} Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java ------------------------------------------------------------------------------ svn:eol-style = native ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs