Author: siren Date: Tue Nov 21 09:51:57 2006 New Revision: 477786 URL: http://svn.apache.org/viewvc?view=rev&rev=477786 Log: NUTCH-362
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/conf/parse-plugins.xml Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=477786&r1=477785&r2=477786 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Tue Nov 21 09:51:57 2006 @@ -83,6 +83,9 @@ 27. NUTCH-405 - Content object is not properly initialized in map method of ParseSegment (siren) +28. NUTCH-362 - Remove parse-text from unsupported filetypes in + parse-plugins.xml (siren) + Release 0.8 - 2006-07-25 0. Totally new architecture, based on hadoop Modified: lucene/nutch/trunk/conf/parse-plugins.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/parse-plugins.xml?view=diff&rev=477786&r1=477785&r2=477786 ============================================================================== --- lucene/nutch/trunk/conf/parse-plugins.xml (original) +++ lucene/nutch/trunk/conf/parse-plugins.xml Tue Nov 21 09:51:57 2006 @@ -22,16 +22,6 @@ <parse-plugins> - <!-- by default if the mimeType is set to *, or - can't be determined, use parse-text --> - <mimeType name="*"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/java"> - <plugin id="parse-text" /> - </mimeType> - <mimeType name="application/msword"> <plugin id="parse-msword" /> </mimeType> @@ -46,7 +36,6 @@ <mimeType name="application/rss+xml"> <plugin id="parse-rss" /> - <plugin id="parse-text" /> </mimeType> <mimeType name="application/vnd.ms-excel"> @@ -113,20 +102,8 @@ <plugin id="parse-oo" /> </mimeType> - <mimeType name="application/vnd.wap.wbxml"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/vnd.wap.wmlc"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/vnd.wap.wmlscriptc"> - <plugin id="parse-text" /> - </mimeType> - <mimeType name="application/xhtml+xml"> - <plugin id="parse-text" /> + <plugin id="parse-html" /> </mimeType> <mimeType name="application/x-bzip2"> @@ -145,7 +122,6 @@ <mimeType name="application/x-javascript"> <plugin id="parse-js" /> - <plugin id="parse-text" /> </mimeType> <mimeType name="application/x-kword"> @@ -158,66 +134,14 @@ <plugin id="parse-msexcel" /> </mimeType> - <mimeType name="application/x-latex"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-netcdf"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-sh"> - <plugin id="parse-text" /> - </mimeType> - <mimeType name="application/x-shockwave-flash"> <plugin id="parse-swf" /> </mimeType> - <mimeType name="application/x-tcl"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-tex"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-texinfo"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-troff"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-troff-man"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-troff-me"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="application/x-troff-ms"> - <plugin id="parse-text" /> - </mimeType> - <mimeType name="application/zip"> <plugin id="parse-zip" /> </mimeType> - <mimeType name="message/news"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="message/rfc822"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="text/css"> - <plugin id="parse-text" /> - </mimeType> - <mimeType name="text/html"> <plugin id="parse-html" /> </mimeType> @@ -238,30 +162,15 @@ <mimeType name="text/sgml"> <plugin id="parse-html" /> - <plugin id="parse-text" /> </mimeType> <mimeType name="text/tab-separated-values"> <plugin id="parse-msexcel" /> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="text/vnd.wap.wml"> - <plugin id="parse-text" /> - </mimeType> - - <mimeType name="text/vnd.wap.wmlscript"> - <plugin id="parse-text" /> </mimeType> <mimeType name="text/xml"> - <plugin id="parse-text" /> <plugin id="parse-html" /> <plugin id="parse-rss" /> - </mimeType> - - <mimeType name="text/x-setext"> - <plugin id="parse-text" /> </mimeType> <!-- Types for parse-ext plugin: required for unit tests to pass. -->