Bugs item #1045658, was opened at 2004-10-12 12:52
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=491356&aid=1045658&group_id=59548
Category: plugin: other
Group: None
Status: Open
Resolution: None
Priority: 5
Submitted By: roger_clermont (rclermont)
Assigned to: Nobody/Anonymous (nobody)
Summary: https plugin contribution
Initial Comment:
Attached is a https protocol plugin, and included here
is a cvs diff with the corresponding changes to
incorporate it into the project.
The plugin is basically a copy of the http protocol,
but with added protocol handlers to establish a secure
connection.
? src/plugin/protocol-https
Index: build.xml
===================================================================
RCS file: /cvsroot/nutch/nutch/build.xml,v
retrieving revision 1.68
diff -u -r1.68 build.xml
--- build.xml 3 Oct 2004 22:23:30 -0000 1.68
+++ build.xml 12 Oct 2004 19:46:44 -0000
@@ -195,6 +195,7 @@
<packageset dir="${src.dir}"/>
<packageset
dir="${plugins.dir}/protocol-file/src/java"/>
<packageset
dir="${plugins.dir}/protocol-ftp/src/java"/>
+ <packageset
dir="${plugins.dir}/protocol-https/src/java"/>
<packageset
dir="${plugins.dir}/protocol-http/src/java"/>
<packageset
dir="${plugins.dir}/parse-html/src/java"/>
<packageset
dir="${plugins.dir}/parse-text/src/java"/>
Index: default.properties
===================================================================
RCS file: /cvsroot/nutch/nutch/default.properties,v
retrieving revision 1.29
diff -u -r1.29 default.properties
--- default.properties 3 Oct 2004 22:23:30 -0000 1.29
+++ default.properties 12 Oct 2004 19:46:44 -0000
@@ -39,6 +39,7 @@
optimize=on
deprecation=on
+plugin.https=net.nutch.protocol.https*
plugin.http=net.nutch.protocol.http*
plugin.ftp=net.nutch.protocol.ftp*
plugin.file=net.nutch.protocol.file*
@@ -52,6 +53,6 @@
plugin.more=net.nutch.indexer.more*
plugin.language=net.nutch.analysis.lang*
plugin.creative=org.creativecommons.nutch*
-plugins.packages=${plugin.http}:${plugin.ftp}:${plugin.file}:${plugin.html}:${plugin.mp3}:+plugins.packages=${plugin.https}:${plugin.http}:${plugin.ftp}:${plugin.file}:${plugin.html}:${plugin.mp3}:
${plugin.msword}:${plugin.rtf}:${plugin.pdf}:${plugin.text}:${plugin.basic}:${plugin.more}:
${plugin.language}:${plugin.creative}
Index: conf/crawl-urlfilter.txt.template
===================================================================
RCS file:
/cvsroot/nutch/nutch/conf/crawl-urlfilter.txt.template,v
retrieving revision 1.3
diff -u -r1.3 crawl-urlfilter.txt.template
--- conf/crawl-urlfilter.txt.template 16 Jun 2004
17:31:30 -0000 1.3
+++ conf/crawl-urlfilter.txt.template 12 Oct 2004
19:46:44 -0000
@@ -18,7 +18,7 @@
[EMAIL PROTECTED]
# accept hosts in MY.DOMAIN.NAME
-+^http://([a-z0-9]*\.)*MY.DOMAIN.NAME/
++^https?://([a-z0-9]*\.)*MY.DOMAIN.NAME/
# skip everything else
-.
Index: src/plugin/build.xml
===================================================================
RCS file: /cvsroot/nutch/nutch/src/plugin/build.xml,v
retrieving revision 1.20
diff -u -r1.20 build.xml
--- src/plugin/build.xml 3 Oct 2004 22:23:30 -0000 1.20
+++ src/plugin/build.xml 12 Oct 2004 19:46:46 -0000
@@ -8,6 +8,7 @@
<target name="deploy">
<ant dir="protocol-file" target="deploy"/>
<ant dir="protocol-ftp" target="deploy"/>
+ <ant dir="protocol-https" target="deploy"/>
<ant dir="protocol-http" target="deploy"/>
<ant dir="parse-html" target="deploy"/>
<ant dir="parse-text" target="deploy"/>
@@ -30,6 +31,7 @@
<!-- Test all of the plugins.
-->
<!--
====================================================== -->
<target name="test">
+ <ant dir="protocol-https" target="test"/>
<ant dir="protocol-http" target="test"/>
<ant dir="parse-html" target="test"/>
<ant dir="parse-pdf" target="test"/>
@@ -47,6 +49,7 @@
<target name="clean">
<ant dir="protocol-file" target="clean"/>
<ant dir="protocol-ftp" target="clean"/>
+ <ant dir="protocol-https" target="clean"/>
<ant dir="protocol-http" target="clean"/>
<ant dir="parse-html" target="clean"/>
<ant dir="parse-text" target="clean"/>
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=491356&aid=1045658&group_id=59548
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
_______________________________________________
Nutch-developers mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-developers