Author: ab Date: Tue Nov 28 13:02:10 2006 New Revision: 480207 URL: http://svn.apache.org/viewvc?view=rev&rev=480207 Log: Use SpellCheckedMetadata only when necessary, i.e. only when collecting metadata from unreliable sources such as HTTP headers.
* Metadata: fix a bug where SpellCheckedMetadata would try to normalize metadata names during (de)serialization. * Content: should use regular Metadata by default, and when de-serializing. * fix HTTP protocol plugins to use SpellCheckedMetadata, where it's really necessary. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java?view=diff&rev=480207&r1=480206&r2=480207 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Tue Nov 28 13:02:10 2006 @@ -92,6 +92,10 @@ * @return the values associated to a metadata name. */ public String[] getValues(final String name) { + return _getValues(name); + } + + private String[] _getValues(final String name) { String[] values = metadata.get(name); if (values == null) { values = new String[0]; @@ -174,8 +178,8 @@ String[] names = names(); for (int i = 0; i < names.length; i++) { - String[] otherValues = other.getValues(names[i]); - String[] thisValues = getValues(names[i]); + String[] otherValues = other._getValues(names[i]); + String[] thisValues = _getValues(names[i]); if (otherValues.length != thisValues.length) { return false; } @@ -192,7 +196,7 @@ StringBuffer buf = new StringBuffer(); String[] names = names(); for (int i = 0; i < names.length; i++) { - String[] values = getValues(names[i]); + String[] values = _getValues(names[i]); for (int j = 0; j < values.length; j++) { buf.append(names[i]) .append("=") @@ -209,7 +213,7 @@ String[] names = names(); for (int i = 0; i < names.length; i++) { Text.writeString(out, names[i]); - values = getValues(names[i]); + values = _getValues(names[i]); int cnt = 0; for (int j = 0; j < values.length; j++) { if (values[j] != null) Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?view=diff&rev=480207&r1=480206&r2=480207 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Tue Nov 28 13:02:10 2006 @@ -31,7 +31,6 @@ import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.VersionMismatchException; import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.metadata.SpellCheckedMetadata; import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.mime.MimeType; import org.apache.nutch.util.mime.MimeTypeException; @@ -97,7 +96,7 @@ protected final void readFieldsCompressed(DataInput in) throws IOException { version = in.readByte(); - metadata = new SpellCheckedMetadata(); + metadata = new Metadata(); switch (version) { case 0: case 1: Modified: lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?view=diff&rev=480207&r1=480206&r2=480207 ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java (original) +++ lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java Tue Nov 28 13:02:10 2006 @@ -31,6 +31,7 @@ // Nutch imports import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Metadata; +import org.apache.nutch.metadata.SpellCheckedMetadata; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.ProtocolException; import org.apache.nutch.protocol.http.api.HttpBase; @@ -47,7 +48,7 @@ private String base; private byte[] content; private int code; - private Metadata headers = new Metadata(); + private Metadata headers = new SpellCheckedMetadata(); public HttpResponse(HttpBase http, URL url, CrawlDatum datum) Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java?view=diff&rev=480207&r1=480206&r2=480207 ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java (original) +++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java Tue Nov 28 13:02:10 2006 @@ -37,6 +37,7 @@ // Nutch imports import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.metadata.Metadata; +import org.apache.nutch.metadata.SpellCheckedMetadata; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.protocol.http.api.HttpBase; import org.apache.nutch.util.LogUtil; @@ -61,7 +62,7 @@ private int code; - private Metadata headers = new Metadata(); + private Metadata headers = new SpellCheckedMetadata(); public HttpResponse(HttpBase http, URL url, CrawlDatum datum) throws IOException {