vgritsenko 02/01/23 11:06:39
Modified: src/java/org/apache/cocoon/components/crawler
CocoonCrawler.java SimpleCocoonCrawlerImpl.java
src/java/org/apache/cocoon/components/search
LuceneCocoonHelper.java LuceneCocoonIndexer.java
LuceneCocoonPager.java
LuceneIndexContentHandler.java
LuceneXMLIndexer.java
SimpleLuceneCocoonIndexerImpl.java
SimpleLuceneCocoonSearcherImpl.java
SimpleLuceneXMLIndexerImpl.java
Log:
- Add getCountOfHits() to the pager to fill in the gap;
- Organize imports
- Use '&' instead of '?' when requesting an URI with parameters (patch idea by
Colin Britton [[EMAIL PROTECTED]])
Revision Changes Path
1.2 +4 -3
xml-cocoon2/src/java/org/apache/cocoon/components/crawler/CocoonCrawler.java
Index: CocoonCrawler.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/CocoonCrawler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- CocoonCrawler.java 3 Jan 2002 12:31:09 -0000 1.1
+++ CocoonCrawler.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -6,17 +6,18 @@
* the LICENSE file. *
*/
package org.apache.cocoon.components.crawler;
-import java.net.*;
-import java.util.*;
import org.apache.avalon.framework.component.Component;
+import java.net.URL;
+import java.util.Iterator;
+
/**
* The avalon behavioural component interface of crawling.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: CocoonCrawler.java,v 1.1 2002/01/03 12:31:09 giacomo Exp $
+ * @version CVS $Id: CocoonCrawler.java,v 1.2 2002/01/23 19:06:38 vgritsenko Exp
$
*/
public interface CocoonCrawler extends Component
{
1.2 +19 -10
xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java
Index: SimpleCocoonCrawlerImpl.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- SimpleCocoonCrawlerImpl.java 3 Jan 2002 12:31:09 -0000 1.1
+++ SimpleCocoonCrawlerImpl.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -7,13 +7,7 @@
*/
package org.apache.cocoon.components.crawler;
-import java.io.*;
-import java.net.*;
-import java.util.*;
-
-import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.activity.Disposable;
-
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
@@ -21,6 +15,8 @@
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.thread.ThreadSafe;
+import org.apache.avalon.excalibur.pool.Recyclable;
+
import org.apache.cocoon.Constants;
import org.apache.cocoon.util.Tokenizer;
@@ -29,11 +25,22 @@
import org.apache.regexp.RE;
import org.apache.regexp.RESyntaxException;
+import java.io.InputStream;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
+
/**
* A simple cocoon crawler.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.1 2002/01/03 12:31:09
giacomo Exp $
+ * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.2 2002/01/23 19:06:38
vgritsenko Exp $
*/
public class SimpleCocoonCrawlerImpl extends AbstractLoggable
implements CocoonCrawler, Configurable, Disposable, Recyclable
@@ -77,7 +84,7 @@
*
* @since
*/
- public final static String LINK_VIEW_QUERY_DEFAULT = "?cocoon-view=links";
+ public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links";
/**
* Config element name specifying excluding regular expression pattern.
@@ -416,7 +423,9 @@
// get links of url
try {
- URL links_url = new URL(url, url.getPath() + linkViewQuery);
+ URL links_url = new URL(url, url.getPath()
+ + ((url.getPath().indexOf("?") == -1) ? "?" : "&")
+ + linkViewQuery);
URLConnection links_url_connection = links_url.openConnection();
InputStream is = links_url_connection.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
@@ -546,7 +555,7 @@
* </p>
*
* @author <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a>
- * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.1 2002/01/03 12:31:09
giacomo Exp $
+ * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.2 2002/01/23 19:06:38
vgritsenko Exp $
*/
public static class CocoonCrawlerIterator implements Iterator
{
1.2 +6 -9
xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java
Index: LuceneCocoonHelper.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneCocoonHelper.java 3 Jan 2002 12:31:13 -0000 1.1
+++ LuceneCocoonHelper.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -9,20 +9,21 @@
import java.io.File;
import java.io.IOException;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.*;
-import org.apache.lucene.store.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
/**
* This class encapsulates some helper methods.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: LuceneCocoonHelper.java,v 1.1 2002/01/03 12:31:13 giacomo
Exp $
+ * @version CVS $Id: LuceneCocoonHelper.java,v 1.2 2002/01/23 19:06:38
vgritsenko Exp $
*/
public class LuceneCocoonHelper
{
-
/**
*Gets the directory attribute of the LuceneCocoonHelper class
*
@@ -37,7 +38,6 @@
return fsDirectory;
}
-
/**
*Gets the analyzer attribute of the LuceneCocoonHelper class
*
@@ -55,7 +55,6 @@
return analyzer;
}
-
/**
*Gets the indexReader attribute of the LuceneCocoonHelper class
*
@@ -69,7 +68,6 @@
return reader;
}
-
/**
*Gets the indexWriter attribute of the LuceneCocoonHelper class
*
@@ -84,6 +82,5 @@
IndexWriter writer = new IndexWriter(index, analyzer, create);
return writer;
}
-
}
1.2 +1 -3
xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java
Index: LuceneCocoonIndexer.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneCocoonIndexer.java 3 Jan 2002 12:31:13 -0000 1.1
+++ LuceneCocoonIndexer.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -19,11 +19,10 @@
* The avalon behavioural component interface of an indexer.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: LuceneCocoonIndexer.java,v 1.1 2002/01/03 12:31:13 giacomo
Exp $
+ * @version CVS $Id: LuceneCocoonIndexer.java,v 1.2 2002/01/23 19:06:38
vgritsenko Exp $
*/
public interface LuceneCocoonIndexer extends Component
{
-
/**
*Description of the Field
*
@@ -53,4 +52,3 @@
void index(Directory index, boolean create, URL base_url)
throws ProcessingException;
}
-
1.2 +15 -8
xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonPager.java
Index: LuceneCocoonPager.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneCocoonPager.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneCocoonPager.java 3 Jan 2002 12:31:13 -0000 1.1
+++ LuceneCocoonPager.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -9,21 +9,19 @@
import java.io.File;
import java.io.IOException;
+import java.util.ListIterator;
+import java.util.ArrayList;
+import java.util.NoSuchElementException;
-import java.util.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.*;
import org.apache.lucene.search.Hits;
-import org.apache.lucene.store.*;
-
-// implementtion of ListIterator
/**
* This class should help you to manage paging of hits.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: LuceneCocoonPager.java,v 1.1 2002/01/03 12:31:13 giacomo
Exp $
+ * @version CVS $Id: LuceneCocoonPager.java,v 1.2 2002/01/23 19:06:38 vgritsenko
Exp $
*/
public class LuceneCocoonPager implements ListIterator
{
@@ -127,6 +125,16 @@
/**
+ * Get count of hits
+ *
+ * @return The count of hits
+ * @since
+ */
+ public int getCountOfHits() {
+ return hits.length();
+ }
+
+ /**
* Get count of hits displayed per single page
*
* @return The countOfHitsPerPage value
@@ -136,7 +144,6 @@
return this.countOfHitsPerPage;
}
-
/**
* Caluclate count of pages for displaying all hits
*
@@ -303,7 +310,7 @@
* A helper class encapsulating found document, and its score
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: LuceneCocoonPager.java,v 1.1 2002/01/03 12:31:13
giacomo Exp $
+ * @version CVS $Id: LuceneCocoonPager.java,v 1.2 2002/01/23 19:06:38
vgritsenko Exp $
*/
public static class HitWrapper
{
1.2 +4 -6
xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java
Index: LuceneIndexContentHandler.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneIndexContentHandler.java 3 Jan 2002 12:31:13 -0000 1.1
+++ LuceneIndexContentHandler.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -6,17 +6,17 @@
* the LICENSE file. *
*/
package org.apache.cocoon.components.search;
+
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-
import java.util.Stack;
-import org.apache.lucene.document.DateField;
+import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.xml.sax.Attributes;
+import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
@@ -27,7 +27,7 @@
* Parse XML and generate lucene document(s)
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: LuceneIndexContentHandler.java,v 1.1 2002/01/03 12:31:13
giacomo Exp $
+ * @version CVS $Id: LuceneIndexContentHandler.java,v 1.2 2002/01/23 19:06:38
vgritsenko Exp $
*/
public class LuceneIndexContentHandler implements ContentHandler
{
@@ -222,6 +222,4 @@
* @since
*/
public void startPrefixMapping(String prefix, String uri) { }
-
}
-
1.2 +3 -2
xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java
Index: LuceneXMLIndexer.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneXMLIndexer.java 3 Jan 2002 12:31:13 -0000 1.1
+++ LuceneXMLIndexer.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -6,11 +6,13 @@
* the LICENSE file. *
*/
package org.apache.cocoon.components.search;
+
import java.net.URL;
import java.util.Iterator;
import java.util.List;
import org.apache.avalon.framework.component.Component;
+
import org.apache.cocoon.ProcessingException;
/**
@@ -31,7 +33,7 @@
* </p>
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Id: LuceneXMLIndexer.java,v 1.1 2002/01/03 12:31:13 giacomo Exp
$
+ * @version CVS $Id: LuceneXMLIndexer.java,v 1.2 2002/01/23 19:06:38 vgritsenko
Exp $
*/
public interface LuceneXMLIndexer extends Component
{
@@ -129,4 +131,3 @@
*/
void build(URL url) throws ProcessingException;
}
-
1.2 +4 -2
xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java
Index: SimpleLuceneCocoonIndexerImpl.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- SimpleLuceneCocoonIndexerImpl.java 3 Jan 2002 12:31:13 -0000 1.1
+++ SimpleLuceneCocoonIndexerImpl.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -46,7 +46,7 @@
* </p>
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $
+ * @version CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $
*/
public class SimpleLuceneCocoonIndexerImpl extends AbstractLoggable
implements LuceneCocoonIndexer, Configurable, Composable, Disposable
@@ -198,8 +198,10 @@
// skip urls using different host, or port than host,
// or port of base url
- System.out.println("Skipping carwling URL " +
crawl_url.toString() +
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("Skipping crawling URL " +
crawl_url.toString() +
" as base_url is " + base_url.toString());
+ }
continue;
}
1.2 +3 -6
xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java
Index: SimpleLuceneCocoonSearcherImpl.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- SimpleLuceneCocoonSearcherImpl.java 3 Jan 2002 12:31:13 -0000 1.1
+++ SimpleLuceneCocoonSearcherImpl.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -14,22 +14,19 @@
import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.activity.Disposable;
-
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.component.ComponentManager;
import org.apache.avalon.framework.component.Composable;
-
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
-
import org.apache.avalon.framework.logger.AbstractLoggable;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.util.ClassUtils;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateField;
-
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;
@@ -57,7 +54,7 @@
* </p>
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $
+ * @version CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $
*/
public class SimpleLuceneCocoonSearcherImpl extends AbstractLoggable
implements LuceneCocoonSearcher, Configurable, Composable, Disposable,
Recyclable
@@ -218,8 +215,8 @@
this.directory = directory;
if (indexReaderCache != null) {
indexReaderCache.close();
+ indexReaderCache = null;
}
- indexReaderCache = null;
}
1.2 +5 -3
xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java
Index: SimpleLuceneXMLIndexerImpl.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- SimpleLuceneXMLIndexerImpl.java 3 Jan 2002 12:31:13 -0000 1.1
+++ SimpleLuceneXMLIndexerImpl.java 23 Jan 2002 19:06:38 -0000 1.2
@@ -54,7 +54,7 @@
* A simple class building lucene documents from xml content.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a>
- * @version CVS $Revision: 1.1 $ $Date: 2002/01/03 12:31:13 $
+ * @version CVS $Revision: 1.2 $ $Date: 2002/01/23 19:06:38 $
*/
public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable
implements LuceneXMLIndexer, Configurable, Composable
@@ -80,7 +80,7 @@
*
* @since
*/
- final String CONTENT_QUERY = "?cocoon-view=content";
+ final String CONTENT_QUERY = "cocoon-view=content";
/**
* set of allowed content types
@@ -163,7 +163,9 @@
throws ProcessingException {
try {
- URL contentURL = new URL(url, url.getPath() + CONTENT_QUERY);
+ URL contentURL = new URL(url, url.getPath()
+ + ((url.getPath().indexOf("?") == -1) ? "?" : "&")
+ + CONTENT_QUERY);
URLConnection contentURLConnection = contentURL.openConnection();
String contentType = contentURLConnection.getContentType();
if (contentType != null &&
----------------------------------------------------------------------
In case of troubles, e-mail: [EMAIL PROTECTED]
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]