Bugs item #1045658, was opened at 2004-10-12 12:52
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=491356&aid=1045658&group_id=59548

Category: plugin: other
Group: None
Status: Open
Resolution: None
Priority: 5
Submitted By: roger_clermont (rclermont)
Assigned to: Nobody/Anonymous (nobody)
Summary: https plugin contribution

Initial Comment:
Attached is a https protocol plugin, and included here
is a cvs diff with the corresponding changes to
incorporate it into the project.

The plugin is basically a copy of the http protocol,
but with added protocol handlers to establish a secure
connection.





? src/plugin/protocol-https
Index: build.xml
===================================================================
RCS file: /cvsroot/nutch/nutch/build.xml,v
retrieving revision 1.68
diff -u -r1.68 build.xml
--- build.xml   3 Oct 2004 22:23:30 -0000       1.68
+++ build.xml   12 Oct 2004 19:46:44 -0000
@@ -195,6 +195,7 @@
        <packageset dir="${src.dir}"/>
        <packageset
dir="${plugins.dir}/protocol-file/src/java"/>
        <packageset
dir="${plugins.dir}/protocol-ftp/src/java"/>
+       <packageset
dir="${plugins.dir}/protocol-https/src/java"/>
                <packageset
dir="${plugins.dir}/protocol-http/src/java"/>
        <packageset
dir="${plugins.dir}/parse-html/src/java"/>
        <packageset
dir="${plugins.dir}/parse-text/src/java"/>
Index: default.properties
===================================================================
RCS file: /cvsroot/nutch/nutch/default.properties,v
retrieving revision 1.29
diff -u -r1.29 default.properties
--- default.properties  3 Oct 2004 22:23:30 -0000       1.29
+++ default.properties  12 Oct 2004 19:46:44 -0000
@@ -39,6 +39,7 @@
 optimize=on
 deprecation=on
 
+plugin.https=net.nutch.protocol.https*
 plugin.http=net.nutch.protocol.http*
 plugin.ftp=net.nutch.protocol.ftp*
 plugin.file=net.nutch.protocol.file*
@@ -52,6 +53,6 @@
 plugin.more=net.nutch.indexer.more*
 plugin.language=net.nutch.analysis.lang*
 plugin.creative=org.creativecommons.nutch*
-plugins.packages=${plugin.http}:${plugin.ftp}:${plugin.file}:${plugin.html}:${plugin.mp3}:+plugins.packages=${plugin.https}:${plugin.http}:${plugin.ftp}:${plugin.file}:${plugin.html}:${plugin.mp3}:
 
${plugin.msword}:${plugin.rtf}:${plugin.pdf}:${plugin.text}:${plugin.basic}:${plugin.more}:
     ${plugin.language}:${plugin.creative}
Index: conf/crawl-urlfilter.txt.template
===================================================================
RCS file:
/cvsroot/nutch/nutch/conf/crawl-urlfilter.txt.template,v
retrieving revision 1.3
diff -u -r1.3 crawl-urlfilter.txt.template
--- conf/crawl-urlfilter.txt.template   16 Jun 2004
17:31:30 -0000  1.3
+++ conf/crawl-urlfilter.txt.template   12 Oct 2004
19:46:44 -0000
@@ -18,7 +18,7 @@
 [EMAIL PROTECTED]
 
 # accept hosts in MY.DOMAIN.NAME
-+^http://([a-z0-9]*\.)*MY.DOMAIN.NAME/
++^https?://([a-z0-9]*\.)*MY.DOMAIN.NAME/
 
 # skip everything else
 -.
Index: src/plugin/build.xml
===================================================================
RCS file: /cvsroot/nutch/nutch/src/plugin/build.xml,v
retrieving revision 1.20
diff -u -r1.20 build.xml
--- src/plugin/build.xml        3 Oct 2004 22:23:30 -0000       1.20
+++ src/plugin/build.xml        12 Oct 2004 19:46:46 -0000
@@ -8,6 +8,7 @@
   <target name="deploy">
     <ant dir="protocol-file" target="deploy"/>
     <ant dir="protocol-ftp" target="deploy"/>
+    <ant dir="protocol-https" target="deploy"/>
     <ant dir="protocol-http" target="deploy"/>
     <ant dir="parse-html" target="deploy"/>
     <ant dir="parse-text" target="deploy"/>
@@ -30,6 +31,7 @@
   <!-- Test all of the plugins.                      
        -->
   <!--
====================================================== -->
   <target name="test">
+    <ant dir="protocol-https" target="test"/>
     <ant dir="protocol-http" target="test"/>
     <ant dir="parse-html" target="test"/>
     <ant dir="parse-pdf" target="test"/>
@@ -47,6 +49,7 @@
   <target name="clean">
     <ant dir="protocol-file" target="clean"/>
     <ant dir="protocol-ftp" target="clean"/>
+    <ant dir="protocol-https" target="clean"/>
     <ant dir="protocol-http" target="clean"/>
     <ant dir="parse-html" target="clean"/>
     <ant dir="parse-text" target="clean"/>


----------------------------------------------------------------------

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=491356&aid=1045658&group_id=59548


-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
_______________________________________________
Nutch-developers mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-developers

Reply via email to