Author: snagel
Date: Thu Oct 9 19:20:51 2014
New Revision: 1630565
URL: http://svn.apache.org/r1630565
Log:
NUTCH-1164 JUnit tests for protocol-http
Added:
nutch/trunk/src/plugin/protocol-http/jsp/
nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp (with props)
nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp (with props)
nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp (with props)
nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp (with props)
nutch/trunk/src/plugin/protocol-http/src/test/conf/
nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
(with props)
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
(with props)
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/build.xml
nutch/trunk/src/plugin/build.xml
nutch/trunk/src/plugin/protocol-http/build.xml
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct 9 19:20:51 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1164 Write JUnit tests for protocol-http (nimafl via snagel)
+
* NUTCH-1868 Document and improve CLI for FileDumper tool (lewismc)
* NUTCH-1869 Add a flag to -mimeType fiag to FileDumper (lewismc)
@@ -10,7 +12,7 @@ Nutch Current Development 1.10-SNAPSHOT
* NUTCH-1826, NUTCH-1864 indexchecker fails if solr.server.url not configured
(lewismc, snagel)
-* NUTCH-1866 ant eclipse target should not delete runtime (nimafl vai lewismc)
+* NUTCH-1866 ant eclipse target should not delete runtime (nimafl via lewismc)
* NUTCH-1857 readb -dump -format csv should use comma (lewismc)
Modified: nutch/trunk/build.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Thu Oct 9 19:20:51 2014
@@ -992,7 +992,7 @@
<source path="${plugins.dir}/protocol-httpclient/src/java/" />
<source path="${plugins.dir}/protocol-httpclient/src/test/" />
<source path="${plugins.dir}/protocol-http/src/java/" />
- <!-- <source path="${plugins.dir}/protocol-http/src/test/" /> -->
+ <source path="${plugins.dir}/protocol-http/src/test/" />
<source path="${plugins.dir}/scoring-depth/src/java/" />
<source path="${plugins.dir}/scoring-link/src/java/" />
<source path="${plugins.dir}/scoring-opic/src/java/" />
Modified: nutch/trunk/src/plugin/build.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/src/plugin/build.xml (original)
+++ nutch/trunk/src/plugin/build.xml Thu Oct 9 19:20:51 2014
@@ -88,6 +88,7 @@
<ant dir="language-identifier" target="test"/>
<ant dir="lib-http" target="test"/>
<ant dir="protocol-file" target="test"/>
+ <ant dir="protocol-http" target="test"/>
<ant dir="protocol-httpclient" target="test"/>
<!--ant dir="parse-ext" target="test"/-->
<ant dir="feed" target="test"/>
Modified: nutch/trunk/src/plugin/protocol-http/build.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/build.xml (original)
+++ nutch/trunk/src/plugin/protocol-http/build.xml Thu Oct 9 19:20:51 2014
@@ -29,12 +29,22 @@
<fileset dir="${nutch.root}/build">
<include name="**/lib-http/*.jar" />
</fileset>
+ <pathelement location="${build.dir}/test/conf"/>
</path>
<!-- Deploy Unit test dependencies -->
<target name="deps-test">
<ant target="deploy" inheritall="false" dir="../lib-http"/>
<ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+ <copy toDir="${build.test}">
+ <fileset dir="${src.test}" excludes="**/*.java"/>
+ </copy>
</target>
+ <!-- for junit test -->
+ <mkdir dir="${build.test}/data" />
+ <copy todir="${build.test}/data">
+ <fileset dir="jsp"/>
+ </copy>
+
</project>
Added: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp Thu Oct 9 19:20:51
2014
@@ -0,0 +1,44 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>HelloWorld</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+ <meta name="Language" content="en" />
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+ </head>
+
+ <body>
+ Hello World!!! <br>
+ </body>
+</html>
Propchange: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp Thu Oct 9 19:20:51
2014
@@ -0,0 +1,47 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%>
+
+@ page language="java" import="java.util.*" pageEncoding="UTF-8"
+
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>HelloWorld</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+ <meta name="Language" content="en" />
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+ </head>
+
+ <body>
+ Hello World!!! <br>
+ </body>
+</html>
Propchange: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp Thu Oct 9
19:20:51 2014
@@ -0,0 +1,49 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>My JSP page</title>
+
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+
+ </head>
+
+ <body>
+ <%
+ response.setStatus(301);
+ response.setHeader( "Location", "http://nutch.apache.org");
+ response.setHeader( "Connection", "close" );
+ %>
+ You are redirected by JSP<br>
+ </body>
+</html>
Propchange: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp Thu Oct 9
19:20:51 2014
@@ -0,0 +1,49 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>My JSP page</title>
+
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+
+ </head>
+
+ <body>
+ <%
+ response.setStatus(302);
+ response.setHeader( "Location", "http://nutch.apache.org");
+ response.setHeader( "Connection", "close" );
+ %>
+ You are sucessfully redirected by JSP<br>
+ </body>
+</html>
Propchange: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp
------------------------------------------------------------------------------
svn:eol-style = native
Added: nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
(added)
+++ nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml Thu
Oct 9 19:20:51 2014
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<configuration>
+
+<property>
+ <name>http.robots.agents</name>
+ <value>Nutch-Test,*</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.agent.name</name>
+ <value>Nutch-Test</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.agent.description</name>
+ <value>Nutch protocol-httpclient test</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.auth.file</name>
+ <value>httpclient-auth-test.xml</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.timeout</name>
+ <value>60000</value>
+ <description></description>
+</property>
+
+</configuration>
\ No newline at end of file
Propchange:
nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added:
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1630565&view=auto
==============================================================================
---
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
(added)
+++
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
Thu Oct 9 19:20:51 2014
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.URL;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.junit.After;
+import org.junit.Test;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.nio.SelectChannelConnector;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.ServletHolder;
+
+/**
+ * Test cases for protocol-http
+ */
+public class TestProtocolHttp {
+ private static final String RES_DIR = System.getProperty("test.data", ".");
+
+ private Http http;
+ private Server server;
+ private Context root;
+ private Configuration conf;
+ private int port;
+
+ public void setUp(boolean redirection) throws Exception {
+ conf = new Configuration();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("nutch-site-test.xml");
+
+ http = new Http();
+ http.setConf(conf);
+
+ server = new Server();
+
+ if (redirection) {
+ root = new Context(server, "/redirection", Context.SESSIONS);
+ root.setAttribute("newContextURL", "/redirect");
+ } else {
+ root = new Context(server, "/", Context.SESSIONS);
+ }
+
+ ServletHolder sh = new ServletHolder(
+ org.apache.jasper.servlet.JspServlet.class);
+ root.addServlet(sh, "*.jsp");
+ root.setResourceBase(RES_DIR);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ server.stop();
+ }
+
+ @Test
+ public void testStatusCode() throws Exception {
+ startServer(47504, false);
+ fetchPage("/basic-http.jsp", 200);
+ fetchPage("/redirect301.jsp", 301);
+ fetchPage("/redirect302.jsp", 302);
+ fetchPage("/nonexists.html", 404);
+ fetchPage("/brokenpage.jsp", 500);
+ }
+
+ @Test
+ public void testRedirectionJetty() throws Exception {
+ // Redirection via Jetty
+ startServer(47503, true);
+ fetchPage("/redirection", 302);
+ }
+
+ /**
+ * Starts the Jetty server at a specified port and redirection parameter.
+ *
+ * @param portno
+ * Port number.
+ * @param redirection
+ * whether redirection
+ */
+ private void startServer(int portno, boolean redirection) throws Exception {
+ port = portno;
+ setUp(redirection);
+ SelectChannelConnector connector = new SelectChannelConnector();
+ connector.setHost("127.0.0.1");
+ connector.setPort(port);
+
+ server.addConnector(connector);
+ server.start();
+ }
+
+ /**
+ * Fetches the specified <code>page</code> from the local Jetty server and
+ * checks whether the HTTP response status code matches with the expected
+ * code. Also use jsp pages for redirection.
+ *
+ * @param page
+ * Page to be fetched.
+ * @param expectedCode
+ * HTTP response status code expected while fetching the page.
+ */
+ private void fetchPage(String page, int expectedCode) throws Exception {
+ URL url = new URL("http", "127.0.0.1", port, page);
+ CrawlDatum crawlDatum = new CrawlDatum();
+ Response response = http.getResponse(url, crawlDatum, true);
+ ProtocolOutput out = http.getProtocolOutput(new Text(url.toString()),
+ crawlDatum);
+ Content content = out.getContent();
+ assertEquals("HTTP Status Code for " + url, expectedCode,
+ response.getCode());
+
+ if (page.compareTo("/nonexists.html") != 0
+ && page.compareTo("/brokenpage.jsp") != 0
+ && page.compareTo("/redirection") != 0) {
+ assertEquals("ContentType " + url, "application/xhtml+xml",
+ content.getContentType());
+ }
+ }
+}
+
Propchange:
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
------------------------------------------------------------------------------
svn:eol-style = native