Author: pkosiorowski
Date: Mon Aug  8 12:48:17 2005
New Revision: 230867

URL: http://svn.apache.org/viewcvs?rev=230867&view=rev
Log:
Skipping png and pdf files.

Modified:
    lucene/nutch/trunk/conf/crawl-urlfilter.txt.template

Modified: lucene/nutch/trunk/conf/crawl-urlfilter.txt.template
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/crawl-urlfilter.txt.template?rev=230867&r1=230866&r2=230867&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/crawl-urlfilter.txt.template (original)
+++ lucene/nutch/trunk/conf/crawl-urlfilter.txt.template Mon Aug  8 12:48:17 
2005
@@ -12,7 +12,7 @@
 -^(file|ftp|mailto):
 
 # skip image and other suffixes we can't yet parse
--\.(gif|GIF|jpg|JPG|ico|ICO|css|sit|eps|wmf|zip|ppt|mpg|xls|gz|rpm|tgz|mov|MOV|exe)$
+-\.(gif|GIF|jpg|JPG|ico|ICO|css|sit|eps|wmf|zip|ppt|mpg|xls|gz|rpm|tgz|mov|MOV|exe|png|PNG|pdf|PDF)$
 
 # skip URLs containing certain characters as probable queries, etc.
 [EMAIL PROTECTED]


Reply via email to