Author: mattmann Date: Fri Mar 9 22:52:31 2007 New Revision: 516660 URL: http://svn.apache.org/viewvc?view=rev&rev=516660 Log: fix for NUTCH-384 (contributed by Heiko Dietze)
Added: lucene/nutch/trunk/src/plugin/protocol-file/src/test/ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=516660&r1=516659&r2=516660 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Fri Mar 9 22:52:31 2007 @@ -155,6 +155,9 @@ 52. NUTCH-167 - Observation of robots "noarchive" directive. (ab) +53. NUTCH-384 - Protocol-file plugin does not allow the parse plugins + framework to operate properly (Heiko Dietze via mattmann) + Release 0.8 - 2006-07-25 Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?view=diff&rev=516660&r1=516659&r2=516660 ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java (original) +++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java Fri Mar 9 22:52:31 2007 @@ -34,6 +34,7 @@ import org.apache.nutch.protocol.ProtocolOutput; import org.apache.nutch.protocol.ProtocolStatus; import org.apache.nutch.protocol.RobotRules; +import org.apache.nutch.util.NutchConfiguration; import java.net.URL; @@ -100,6 +101,7 @@ } } } catch (Exception e) { + e.printStackTrace(); return new ProtocolOutput(null, new ProtocolStatus(e)); } } @@ -137,6 +139,7 @@ } File file = new File(); + file.setConf(NutchConfiguration.create()); if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength file.setMaxContentLength(maxContentLength); Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?view=diff&rev=516660&r1=516659&r2=516660 ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java (original) +++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Fri Mar 9 22:52:31 2007 @@ -26,6 +26,8 @@ // Nutch imports import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.protocol.Content; +import org.apache.nutch.util.mime.MimeType; +import org.apache.nutch.util.mime.MimeTypes; import org.apache.nutch.metadata.Metadata; import org.apache.nutch.net.protocols.Response; @@ -191,9 +193,12 @@ // set headers headers.set(Response.CONTENT_LENGTH, new Long(size).toString()); - headers.set(Response.LAST_MODIFIED, - this.file.httpDateFormat.toString(f.lastModified())); - headers.set(Response.CONTENT_TYPE, ""); // No Content-Type at file protocol level + headers.set(Response.LAST_MODIFIED, this.file.httpDateFormat.toString(f + .lastModified())); + MimeTypes mimeTypes = MimeTypes.get(conf.get("mime.types.file")); + MimeType mimeType = mimeTypes.getMimeType(f); + String mimeTypeString = mimeType != null ? mimeType.getName() : ""; + headers.set(Response.CONTENT_TYPE, mimeTypeString); // response code this.code = 200; // http OK Added: lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java?view=auto&rev=516660 ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java (added) +++ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java Fri Mar 9 22:52:31 2007 @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nutch.protocol.file; + +// Hadoop imports +import org.apache.hadoop.io.Text; + +// Nutch imports +import org.apache.nutch.crawl.CrawlDatum; +import org.apache.nutch.net.protocols.Response; +import org.apache.nutch.protocol.ProtocolOutput; +import org.apache.nutch.protocol.ProtocolStatus; +import org.apache.nutch.util.NutchConfiguration; + +// Junit imports +import junit.framework.TestCase; + +/** + * @author mattmann + * @version $Revision$ + * + * <p> + * Unit tests for the [EMAIL PROTECTED] File}Protocol. + * </p>. + */ +public class TestProtocolFile extends TestCase { + + private static final org.apache.nutch.protocol.file.File fileProtocol = + new org.apache.nutch.protocol.file.File(); + + private static final String testTextFile = "testprotocolfile.txt"; + + private static final CrawlDatum datum = new CrawlDatum(); + + private static final String expectedMimeType = "text/plain"; + + static { + fileProtocol.setConf(NutchConfiguration.create()); + } + + /** + * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata + * field. + * + * @since NUTCH-384 + * + */ + public void testSetContentType() { + Text fileUrl = new Text(this.getClass().getResource(testTextFile) + .toString()); + assertNotNull(fileUrl); + ProtocolOutput output = fileProtocol.getProtocolOutput(fileUrl, datum); + assertNotNull(output); + assertEquals("Status code: [" + output.getStatus().getCode() + + "], not equal to: [" + ProtocolStatus.SUCCESS + "]: args: [" + + output.getStatus().getArgs() + "]", ProtocolStatus.SUCCESS, output + .getStatus().getCode()); + assertNotNull(output.getContent()); + assertNotNull(output.getContent().getContentType()); + assertEquals(expectedMimeType, output.getContent().getContentType()); + assertNotNull(output.getContent().getMetadata()); + assertEquals(expectedMimeType, output.getContent().getMetadata().get( + Response.CONTENT_TYPE)); + + } + +} Added: lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt?view=auto&rev=516660 ============================================================================== --- lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt (added) +++ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt Fri Mar 9 22:52:31 2007 @@ -0,0 +1 @@ +Protocol File Test ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs