Author: rfrovarp
Date: Tue Mar 8 15:58:45 2011
New Revision: 1079404
URL: http://svn.apache.org/viewvc?rev=1079404&view=rev
Log:
Finish up applying patch for DROIDS-122.
Thanks to Jeremy Arnold for the patch.
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/StandaloneCrawlerController.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractor.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/AbstractFetcher.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/DefaultFetcherFactory.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/HttpFetcher.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcher.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/CrawlerHttpClient.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/DepthFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StateFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StatsFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/IncludeFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/RefererFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/HttpHeaderFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/LinkAttributeFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/NoRepeatFilter.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/AbstractParser.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/DefaultParserFactory.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/JerichoHtmlParser.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/NekoHtmlParser.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXElementParser.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/StAXElementParser.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/CrawlerExecutorService.java
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/WeightComparator.java
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultRejectedExecutionHandler.java
Tue Mar 8 15:58:45 2011
@@ -25,6 +25,7 @@ import java.util.concurrent.ThreadPoolEx
public class DefaultRejectedExecutionHandler implements
RejectedExecutionHandler{
static Log log = LogFactory.getLog(DefaultRejectedExecutionHandler.class);
+ @Override
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor){
log.warn("rejectedExecution() - runnable: " + r + ",
threadPoolExecutor: " + executor);
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/core/thread/DefaultThreadFactory.java
Tue Mar 8 15:58:45 2011
@@ -25,6 +25,7 @@ public class DefaultThreadFactory implem
"droids");
AtomicInteger threadNumber = new AtomicInteger(1);
+ @Override
public Thread newThread(Runnable r){
return new Thread(threadGroup, r, "droids-" +
threadNumber.getAndIncrement(), 0);
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerController.java
Tue Mar 8 15:58:45 2011
@@ -60,27 +60,33 @@ public abstract class AbstractCrawlerCon
}
}
+ @Override
public boolean addFilter(LinkFilter<T> filter){
if (this.filters == null) this.filters = new
ArrayList<LinkFilter<T>>();
return this.filters.add(filter);
}
+ @Override
public boolean removeFilter(LinkFilter<T> filter){
return this.filters != null && this.filters.remove(filter);
}
+ @Override
public void setFilters(List<LinkFilter<T>> filters){
this.filters = filters;
}
+ @Override
public List<LinkFilter<T>> getFilters(){
return filters;
}
+ @Override
public boolean hasFilter(){
return filters != null && filters.size() > 0;
}
+ @Override
public Queue<T> getQueue(){
return queue;
}
@@ -97,6 +103,7 @@ public abstract class AbstractCrawlerCon
this.filterComparator = filterComparator;
}
+ @Override
public CrawlerService<T> getCrawlerService(){
return crawlerService;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerService.java
Tue Mar 8 15:58:45 2011
@@ -30,6 +30,7 @@ public abstract class AbstractCrawlerSer
/**
* Distributed Crawler Service shall override this method if it is not one
instance per host.
*/
+ @Override
public String getNode(){
try{
return InetAddress.getLocalHost().getHostName();
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/AbstractCrawlerWorker.java
Tue Mar 8 15:58:45 2011
@@ -69,23 +69,28 @@ public abstract class AbstractCrawlerWor
}
+ @Override
public boolean addFilter(LinkFilter<T> filter){
if (filters == null) filters = new ArrayList<LinkFilter<T>>();
return filters.add(filter);
}
+ @Override
public boolean removeFilter(LinkFilter<T> filter){
return filters.remove(filter);
}
+ @Override
public void setFilters(List<LinkFilter<T>> filters){
this.filters = filters;
}
+ @Override
public List<LinkFilter<T>> getFilters(){
return filters;
}
+ @Override
public boolean hasFilter(){
return filters != null && filters.size() > 0;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlerWorker.java
Tue Mar 8 15:58:45 2011
@@ -29,6 +29,7 @@ public class CrawlerWorker<T extends Lin
@Autowired(required = false) protected CrawlerService<T> crawlerService;
protected int count = 0;
+ @Override
public void run(){
T link = null;
try{
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/Link.java
Tue Mar 8 15:58:45 2011
@@ -70,6 +70,7 @@ public class Link extends HashMap<String
return id;
}
+ @Override
public String toString(){
StringBuilder out = new StringBuilder();
out.append("Link ( ").append("url: " + LogUtils.toString(url));
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/LocalCrawlerService.java
Tue Mar 8 15:58:45 2011
@@ -53,8 +53,10 @@ public class LocalCrawlerService<T exten
return this;
}
+ @Override
public String getVersion(){return "0.1";}
+ @Override
public T fetch(T link) throws FetcherException{
if (log.isTraceEnabled()) log.trace("fetch() - link: " + link);
try{
@@ -70,6 +72,7 @@ public class LocalCrawlerService<T exten
}
}
+ @Override
public T parse(T link) throws ParserException{
if (log.isTraceEnabled()) log.trace("parse() - link: " + link);
Fetcher fetcher = link.remove("fetched", Fetcher.class);
@@ -92,6 +95,7 @@ public class LocalCrawlerService<T exten
return link;
}
+ @Override
public T extract(T link) throws ExtractorException{
if (log.isTraceEnabled())
log.trace("extract() - link: " + link + ", extractors.size(): " +
(extractors != null ? extractors.size() : -1) + ", extractors: " + extractors);
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/RoundRobinCrawlerService.java
Tue Mar 8 15:58:45 2011
@@ -34,16 +34,20 @@ public class RoundRobinCrawlerService<T
@Autowired(required = false) List<CrawlerService<T>> crawlerServices;
protected AtomicInteger counter = new AtomicInteger();
+ @Override
public String getVersion(){return "0.1";}
+ @Override
public T fetch(T link) throws FetcherException{
return crawlerServices.get(counter.getAndIncrement() %
crawlerServices.size()).fetch(link);
}
+ @Override
public T parse(T link) throws ParserException{
return crawlerServices.get(counter.getAndIncrement() %
crawlerServices.size()).parse(link);
}
+ @Override
public T extract(T link) throws ExtractorException{
return crawlerServices.get(counter.getAndIncrement() %
crawlerServices.size()).extract(link);
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/StandaloneCrawlerController.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/StandaloneCrawlerController.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/StandaloneCrawlerController.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/StandaloneCrawlerController.java
Tue Mar 8 15:58:45 2011
@@ -40,7 +40,8 @@ public class StandaloneCrawlerController
protected int threads = 1;
public boolean isStarted(){ return started; }
-
+
+ @Override
public void start() throws CrawlerException{
started = true;
if (log.isInfoEnabled())
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractor.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractor.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractor.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/extractor/HtmlElementLinkExtractor.java
Tue Mar 8 15:58:45 2011
@@ -51,7 +51,7 @@ public class HtmlElementLinkExtractor<T
static final Pattern CSS_BG =
Pattern.compile("^.*background-image\\s*:\\s*url\\(\\s*['\"]?([^'\"\\)\\s]*)['\"]?\\s*\\).*",
Pattern.CASE_INSENSITIVE);
//static final Pattern CSS_BG =
Pattern.compile("background-image\\s*:\\s*url\\s*\\(\\s*['\"]?(.*)['\"]?\\s*\\)",
Pattern.CASE_INSENSITIVE);
-
+ @Override
public boolean matches(T link){
if (link.containsKey("parsed") && !(link.get("parsed",
Parser.class).getData() instanceof Map)){ // parser.data must be Map, if existed
return false;
@@ -66,6 +66,7 @@ public class HtmlElementLinkExtractor<T
/**
* parser.data must be Map<String, Map<String, Collection<String>>>
*/
+ @Override
public Set<T> extract(T base, Parser<T, ?> parser){
if (log.isTraceEnabled())
log.trace("extract() - base: " + base + ", parser.getClass(): " +
parser.getClass());
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/AbstractFetcher.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/AbstractFetcher.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/AbstractFetcher.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/AbstractFetcher.java
Tue Mar 8 15:58:45 2011
@@ -57,18 +57,22 @@ public abstract class AbstractFetcher<T
return filters.add(filter);
}
+ @Override
public boolean removeFilter(FetchFilter<T> filter){
return filters != null && filters.remove(filter);
}
+ @Override
public void setFilters(List<FetchFilter<T>> filters){
this.filters = filters;
}
+ @Override
public List<FetchFilter<T>> getFilters(){
return filters;
}
+ @Override
public boolean hasFilter(){
return filters != null && filters.size() > 0;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/DefaultFetcherFactory.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/DefaultFetcherFactory.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/DefaultFetcherFactory.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/DefaultFetcherFactory.java
Tue Mar 8 15:58:45 2011
@@ -58,6 +58,7 @@ public class DefaultFetcherFactory<T ext
}
}
+ @Override
public Fetcher<T> newFetcher(T link){
if (link == null) throw new IllegalArgumentException("link is null");
if (link.containsKey("fetcher"))
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/HttpFetcher.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/HttpFetcher.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/HttpFetcher.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/HttpFetcher.java
Tue Mar 8 15:58:45 2011
@@ -61,17 +61,18 @@ public class HttpFetcher<T extends Link>
return this;
}
+ @Override
public boolean matches(T link){
URI uri = link.getURI();
return "http".equalsIgnoreCase(uri.getScheme()) ||
"https".equalsIgnoreCase(uri.getScheme());
}
-
public Fetcher fetch(T link) throws FetcherException{
if (link.containsKey("fetch.params")) return fetch(link, (Map)
link.get("fetch.params"));
else return fetch(link, null);
}
+ @Override
public Fetcher fetch(T link, Map params) throws FetcherException{
if (log.isTraceEnabled()) log.trace("to fetch - link: " + link + ",
params: " + params);
try{
@@ -104,6 +105,7 @@ public class HttpFetcher<T extends Link>
return this.response != null ? this.response : null;
}
+ @Override
public int getStatusCode(){
return this.response != null ?
this.response.getStatusLine().getStatusCode() : -1;
}
@@ -112,29 +114,34 @@ public class HttpFetcher<T extends Link>
return this.entity != null ? this.entity.getContent() : null;
}
+ @Override
public Entity getEntity(){
return this.entity;
}
+ @Override
public Delay<T> getDelay(){
return this.delay;
}
-
+ @Override
public void reset(){
this.entity = null;
this.request = null;
this.response = null;
}
+ @Override
public void addHttpHeader(String key, String value){
request.addHeader(key, value);
}
+ @Override
public void setHttpHeader(String key, String value){
request.setHeader(key, value);
}
+ @Override
public int getWeight(){
return 10;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcher.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcher.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcher.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/appengine/AppEngineFetcher.java
Tue Mar 8 15:58:45 2011
@@ -70,6 +70,7 @@ public class AppEngineFetcher<T extends
protected transient HTTPResponse response;*/
protected Delay delay;
+ @Override
public boolean matches(T link){
if (link == null) return false;
URI uri = link.getURI();
@@ -82,6 +83,7 @@ public class AppEngineFetcher<T extends
else return fetch(link, null);
}
+ @Override
public Fetcher fetch(T link, Map params) throws FetcherException{
if (link == null) return null;
@@ -169,21 +171,24 @@ try{
}*/
}
+ @Override
public int getStatusCode(){
return this.statusCode;
}
-
+ @Override
public void reset(){
this.entity = null;
/*this.request = null;
this.response = null;*/
}
+ @Override
public void addHttpHeader(String key, String value){
//request.addHeader(new HTTPHeader(key, value));
}
+ @Override
public void setHttpHeader(String key, String value){
//request.setHeader(new HTTPHeader(key, value));
}
@@ -196,10 +201,12 @@ try{
return response.getResponseCode();
}*/
+ @Override
public Entity getEntity(){
return entity;
}
+ @Override
public Delay<T> getDelay(){
return this.delay;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/CrawlerHttpClient.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/CrawlerHttpClient.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/CrawlerHttpClient.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/fetcher/http/CrawlerHttpClient.java
Tue Mar 8 15:58:45 2011
@@ -68,6 +68,7 @@ public class CrawlerHttpClient extends D
public CrawlerHttpClient init(){
if (useBuffer){
this.addResponseInterceptor(new HttpResponseInterceptor(){
+ @Override
public void process(HttpResponse httpResponse, HttpContext
httpContext) throws HttpException, IOException{
httpResponse.setEntity(new
BufferedHttpEntity(httpResponse.getEntity()));
}
@@ -76,6 +77,7 @@ public class CrawlerHttpClient extends D
return this;
}
+ @Override
public String toString(){
return super.toString() + " - maxTotalConnections: " +
maxTotalConnections +
", maxConnectionsPerRoute: " + maxConnectionsPerRoute + ",
connectionTimeout: " + connectionTimeout +
@@ -83,7 +85,8 @@ public class CrawlerHttpClient extends D
}
- @Override protected HttpParams createHttpParams(){
+ @Override
+ protected HttpParams createHttpParams(){
HttpParams params = new BasicHttpParams();
ConnManagerParams.setMaxTotalConnections(params, maxTotalConnections);
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
@@ -99,7 +102,8 @@ public class CrawlerHttpClient extends D
return params;
}
- @Override protected ClientConnectionManager
createClientConnectionManager(){
+ @Override
+ protected ClientConnectionManager createClientConnectionManager(){
SchemeRegistry schemeRegistry = new SchemeRegistry();
schemeRegistry.register(new Scheme("http",
PlainSocketFactory.getSocketFactory(), 80));
schemeRegistry.register(new Scheme("https",
SSLSocketFactory.getSocketFactory(), 443));
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/DepthFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/DepthFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/DepthFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/DepthFilter.java
Tue Mar 8 15:58:45 2011
@@ -45,6 +45,7 @@ public class DepthFilter<T extends Link>
this.maxDepth = maxDepth;
}
+ @Override
public String toString(){
return super.toString() + " - maxDepth: " + maxDepth;
}
@@ -53,6 +54,7 @@ public class DepthFilter<T extends Link>
/**
* add "depth" data to any polled link without "depth"
*/
+ @Override
public T polled(T link){
if (!link.containsKey("depth")){
link.put("depth", 0);
@@ -61,7 +63,7 @@ public class DepthFilter<T extends Link>
return link;
}
-
+ @Override
public Set<T> extracted(T base, Extractor<T, ? extends Parser> extractor,
Set<T> links){
if (links == null) return null;
if (log.isTraceEnabled())
@@ -91,6 +93,7 @@ public class DepthFilter<T extends Link>
return links;
}
+ @Override
public Set<T> extractedAll(T base, List<Extractor<T, ? extends Parser>>
extractor, Set<T> links){
if (log.isDebugEnabled()){
for (T link : links){
@@ -102,8 +105,10 @@ public class DepthFilter<T extends Link>
return links;
}
+ @Override
public void completed(T link, Set<T> links){ }
+ @Override
public void failed(T link, Object object){ }
public int getMaxDepth(){
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StateFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StateFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StateFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StateFilter.java
Tue Mar 8 15:58:45 2011
@@ -31,48 +31,56 @@ import java.util.List;
public class StateFilter<T extends Link> implements LinkFilter<T>,
FetchFilter<T>, ParseFilter<T, Object>, ExtractFilter<T>, Weighted{
protected static Log log = LogFactory.getLog(StateFilter.class);
-
+ @Override
public int getWeight(){
return 1000;
}
+ @Override
public T polled(T link){
link.setState(Link.State.POLLED);
if (log.isDebugEnabled()) log.debug("polled() - set link to POLLED -
link: " + link);
return link;
}
-
+ @Override
public void requestReady(T link, Fetcher<T> fetcher){
}
+ @Override
public Fetcher<T> fetched(T link, Fetcher<T> fetcher){
link.setState(Link.State.FETCHED);
return fetcher;
}
+ @Override
public void parserReady(T link, Parser<T, Object> parser){
}
+ @Override
public Parser<T, Object> parsed(T link, Parser<T, Object> parser){
link.setState(Link.State.PARSED);
if (log.isDebugEnabled()) log.debug("parsed() - set link to PARSED -
link: " + link);
return parser;
}
+ @Override
public Set<T> extracted(T base, Extractor<T, ? extends Parser> tExtractor,
Set<T> links){
return links;
}
+ @Override
public Set<T> extractedAll(T base, List<Extractor<T, ? extends Parser>>
extractors, Set<T> links){
base.setState(Link.State.EXTRACTED);
return links;
}
+ @Override
public void completed(T link, Set<T> links){
link.setState(Link.State.COMPLETED);
}
+ @Override
public void failed(T link, Object object){
//To change body of implemented methods use File | Settings | File
Templates.
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StatsFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StatsFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StatsFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/StatsFilter.java
Tue Mar 8 15:58:45 2011
@@ -80,7 +80,7 @@ public class StatsFilter<T extends Link>
final StatsFilter filter = this;
reporter = Executors.newSingleThreadExecutor();
reporter.submit(new Runnable(){
-
+ @Override
public void run(){
while (reportInterval > 0){
try{
@@ -101,7 +101,7 @@ public class StatsFilter<T extends Link>
return this;
}
-
+ @Override
public T polled(T link){
if (link == null) return null;
long count = this.statistics.get("poll.count").incrementAndGet();
@@ -111,10 +111,11 @@ public class StatsFilter<T extends Link>
return link;
}
-
+ @Override
public void requestReady(T link, Fetcher<T> fetcher){
}
+ @Override
public Fetcher<T> fetched(T link, Fetcher<T> fetcher){
long count = this.statistics.get("fetch.count").incrementAndGet();
if (log.isTraceEnabled())
@@ -122,9 +123,11 @@ public class StatsFilter<T extends Link>
return fetcher;
}
+ @Override
public void parserReady(Link link, Parser parser){
}
+ @Override
public Parser parsed(Link link, Parser parser){
long count = this.statistics.get("parse.count").incrementAndGet();
if (log.isTraceEnabled())
@@ -132,6 +135,7 @@ public class StatsFilter<T extends Link>
return parser;
}
+ @Override
public Set<T> extracted(T base, Extractor<T, ? extends Parser> tExtractor,
Set<T> links){
long count = this.statistics.get("extract.count").incrementAndGet();
if (log.isTraceEnabled())
@@ -139,6 +143,7 @@ public class StatsFilter<T extends Link>
return links;
}
+ @Override
public Set<T> extractedAll(T base, List<Extractor<T, ? extends Parser>>
extractors, Set<T> links){
long linkCount = statistics.get("total.extract").addAndGet(links !=
null ? links.size() : 0);
if (log.isTraceEnabled())
@@ -146,6 +151,7 @@ public class StatsFilter<T extends Link>
return links;
}
+ @Override
public void completed(T link, Set<T> links){
Long started = (Long) link.remove("started");
long elapsed = started != null ? System.currentTimeMillis() - started
: -1;
@@ -161,6 +167,7 @@ public class StatsFilter<T extends Link>
", totalSize: " + totalSize / 1024 / 1024 + "Mb, link: " +
link);
}
+ @Override
public void failed(T link, Object object){
link.setState(Link.State.FAILED);
@@ -218,6 +225,7 @@ public class StatsFilter<T extends Link>
/**
* @return -1000 to make this filter be sorted at the end
*/
+ @Override
public int getWeight(){
return weight;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/IncludeFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/IncludeFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/IncludeFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/IncludeFilter.java
Tue Mar 8 15:58:45 2011
@@ -59,7 +59,8 @@ public class IncludeFilter<T extends Lin
else throw new IllegalArgumentException("args collection items
must be String or Pattern");
}
}
-
+
+ @Override
public Set<T> extracted(T base, Extractor<T, ? extends Parser> tExtractor,
Set<T> links){
Set<T> result = new HashSet<T>();
for (T link : links){
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/RefererFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/RefererFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/RefererFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/extract/RefererFilter.java
Tue Mar 8 15:58:45 2011
@@ -37,7 +37,8 @@ public class RefererFilter<T extends Lin
* This filter shall be execute after any filter that perform link removal
*
* @return -100
- */
+ */
+ @Override
public int getWeight(){
return weight;
}
@@ -45,11 +46,13 @@ public class RefererFilter<T extends Lin
public void setWeight(int w){
this.weight = w;
}
-
+
+ @Override
public Set<T> extracted(T base, Extractor<T, ? extends Parser> tExtractor,
Set<T> links){
return links;
}
-
+
+ @Override
public Set<T> extractedAll(T base, List<Extractor<T, ? extends Parser>>
extractors, Set<T> links){
for (T extractedLink : links) extractedLink.put("referer",
base.getUrl());
if (log.isTraceEnabled())
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/HttpHeaderFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/HttpHeaderFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/HttpHeaderFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/HttpHeaderFilter.java
Tue Mar 8 15:58:45 2011
@@ -31,7 +31,8 @@ import org.apache.commons.logging.LogFac
public class HttpHeaderFilter<T extends Link> implements FetchFilter<T> {
protected static Log log = LogFactory.getLog(HttpHeaderFilter.class);
-
+
+ @Override
public void requestReady(T link, Fetcher<T> fetcher) {
if (fetcher instanceof HttpHeaderSupport) {
HttpHeaderSupport httpFetcher = (HttpHeaderSupport) fetcher;
@@ -48,7 +49,8 @@ public class HttpHeaderFilter<T extends
}
}
}
-
+
+ @Override
public Fetcher<T> fetched(T link, Fetcher<T> fetcher) {
return fetcher;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/LinkAttributeFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/LinkAttributeFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/LinkAttributeFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/fetch/LinkAttributeFilter.java
Tue Mar 8 15:58:45 2011
@@ -26,9 +26,11 @@ import org.apache.commons.logging.Log;
public class LinkAttributeFilter<T extends Link> implements FetchFilter<T>{
protected static Log log = LogFactory.getLog(LinkAttributeFilter.class);
-
+
+ @Override
public void requestReady(T link, Fetcher<T> fetcher){}
-
+
+ @Override
public Fetcher<T> fetched(T link, Fetcher<T> fetcher){
link.put("statusCode", fetcher.getStatusCode());
link.put("contentType", fetcher.getEntity().getContentType());
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/NoRepeatFilter.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/NoRepeatFilter.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/NoRepeatFilter.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/filter/link/NoRepeatFilter.java
Tue Mar 8 15:58:45 2011
@@ -52,6 +52,7 @@ public class NoRepeatFilter<T extends Li
* Polled links are put in the polledLinks map for checking duplication.
As a queue may contain duplicated link,
* this method also reject links that are polled already.
*/
+ @Override
public T polled(T link){
if (link == null) return null;
boolean repeated = polledLinks.containsKey(link.getId());
@@ -66,8 +67,10 @@ public class NoRepeatFilter<T extends Li
}
}
+ @Override
public void completed(T link, Set<T> links){}
+ @Override
public void failed(T link, Object object){}
/*public Fetcher<T> fetched(T link, Fetcher<T> fetcher) {
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/AbstractParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/AbstractParser.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/AbstractParser.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/AbstractParser.java
Tue Mar 8 15:58:45 2011
@@ -59,24 +59,29 @@ public abstract class AbstractParser<T e
log.debug("parsed and filtered - link.url: " + link.getUrl() + ",
parser: " + parser);
return parser;
}
-
+
+ @Override
public boolean addFilter(ParseFilter<T, D> filter){
if (filters == null) filters = new ArrayList<ParseFilter<T, D>>();
return filters.add(filter);
}
-
+
+ @Override
public boolean removeFilter(ParseFilter<T, D> filter){
return filters != null && filters.remove(filter);
}
-
+
+ @Override
public void setFilters(List<ParseFilter<T, D>> filters){
this.filters = filters;
}
-
+
+ @Override
public List<ParseFilter<T, D>> getFilters(){
return filters;
}
-
+
+ @Override
public boolean hasFilter(){
return filters != null && filters.size() > 0;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/DefaultParserFactory.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/DefaultParserFactory.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/DefaultParserFactory.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/DefaultParserFactory.java
Tue Mar 8 15:58:45 2011
@@ -67,7 +67,8 @@ public class DefaultParserFactory<T exte
log.debug("\tparsers[0].filters: " + (parsers != null &&
parsers.size() > 0 ? parsers.get(0).getFilters() : null));
}
}
-
+
+ @Override
public Parser<T, Object> newParser(T link){
if (link == null) throw new IllegalArgumentException("link is null");
Parser<T, Object> cfg = (Parser<T, Object>) ParamUtils.resolve(link,
"parser", Parser.class, context);
@@ -86,7 +87,8 @@ public class DefaultParserFactory<T exte
log.info("newParser() - cannot find a supported parser - link: " +
link + ", this: " + this.toString());
return null;
}
-
+
+ @Override
public String toString(){
return super.toString() + ", parsers: " + parsers;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/JerichoHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/JerichoHtmlParser.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/JerichoHtmlParser.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/JerichoHtmlParser.java
Tue Mar 8 15:58:45 2011
@@ -34,13 +34,14 @@ public class JerichoHtmlParser<T extends
static Log log = LogFactory.getLog(JerichoHtmlParser.class);
protected Source source;
-
+ @Override
public boolean matches(T link){
if (link == null) return false;
if (!link.containsKey("contentType")) return true; // this is a
default parser for unknown type
return
(String.valueOf(link.get("contentType")).startsWith("text/html"));
}
-
+
+ @Override
public Parser<T, Source> parse(T link, Entity entity, Map<String, Object>
params) throws ParserException{
try{
source = new Source(entity.getContent());
@@ -49,11 +50,13 @@ public class JerichoHtmlParser<T extends
}
return this;
}
-
+
+ @Override
public Source getData(){
return source;
}
-
+
+ @Override
public void reset(){
this.source = null;
}
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/NekoHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/NekoHtmlParser.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/NekoHtmlParser.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/NekoHtmlParser.java
Tue Mar 8 15:58:45 2011
@@ -44,7 +44,8 @@ public class NekoHtmlParser<T extends Li
protected transient SAXParser saxParser;
protected Map<String, String[]> elements;
protected Map<String, Map<String, Set<String>>> data;
-
+
+ @Override
public boolean matches(Link link){
if (link == null) return false;
if (!link.containsKey("contentType")) return true; // this is a
default parser for unknown type
@@ -59,7 +60,8 @@ public class NekoHtmlParser<T extends Li
if (log.isDebugEnabled()) log.debug("init() - elements: " + elements);
return this;
}
-
+
+ @Override
public Parser<T, Map<String, Map<String, Set<String>>>> parse(T link,
Entity entity, Map<String, Object> params) throws ParserException{
if (saxParser == null || elements == null) init();
try{
@@ -76,7 +78,8 @@ public class NekoHtmlParser<T extends Li
throw new ParserException(e);
}
}
-
+
+ @Override
public String toString(){
return "NekoHtmlParser ( data.keySet(): " + ((data != null) ?
data.keySet() : null) + ", elements: " + elements + ", filters: " +
getFilters();
}
@@ -84,10 +87,12 @@ public class NekoHtmlParser<T extends Li
/**
* Map<String, Map<String, Set<String>>>
*/
+ @Override
public Map<String, Map<String, Set<String>>> getData(){
return this.data;
}
-
+
+ @Override
public void reset(){
this.data = null;
this.saxParser = null;
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXElementParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXElementParser.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXElementParser.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/SAXElementParser.java
Tue Mar 8 15:58:45 2011
@@ -47,18 +47,22 @@ public class SAXElementParser<T extends
protected transient SAXParser saxParser;
protected transient SAXParserFactory parserFactory;
protected Map<String, Map<String, Set<String>>> data;
-
- @PostConstruct public Parser<T, Map<String, Map<String, Set<String>>>>
init(){
+
+ @Override
+ @PostConstruct
+ public Parser<T, Map<String, Map<String, Set<String>>>> init(){
if (log.isDebugEnabled()) log.debug("init() - elements: " + elements);
parserFactory = SAXParserFactory.newInstance();
parserFactory.setValidating(false);
return this;
}
-
+
+ @Override
public boolean matches(T link){
return link != null &&
"text/xml".equalsIgnoreCase(String.valueOf(link.get("contentType")));
}
-
+
+ @Override
public Parser<T, Map<String, Map<String, Set<String>>>> parse(T link,
Entity entity, Map<String, Object> params) throws ParserException{
if (elements == null){
log.warn("element is null, skipped - link: " + link + ", entity: "
+ entity);
@@ -84,11 +88,13 @@ public class SAXElementParser<T extends
throw new ParserException(e);
}
}
-
+
+ @Override
public Map<String, Map<String, Set<String>>> getData(){
return this.data;
}
-
+
+ @Override
public void reset(){
this.data = null;
this.currentElement = null;
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/StAXElementParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/StAXElementParser.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/StAXElementParser.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/parser/impl/StAXElementParser.java
Tue Mar 8 15:58:45 2011
@@ -50,11 +50,13 @@ public class StAXElementParser<T extends
inputFactory = XMLInputFactory.newInstance();
return this;
}
-
+
+ @Override
public boolean matches(T link){
return "text/xml".equalsIgnoreCase((String) link.get("contentType"));
}
-
+
+ @Override
public Parser<T, Map<String, Map<String, Set<String>>>> parse(T link,
Entity entity, Map<String, Object> params) throws ParserException{
if (elements == null){
log.warn("parse() - element is null, skipped - link: " + link + ",
entity: " + entity);
@@ -102,11 +104,13 @@ public class StAXElementParser<T extends
throw new ParserException(e);
}
}
-
+
+ @Override
public Map<String, Map<String, Set<String>>> getData(){
return this.data;
}
-
+
+ @Override
public void reset(){
this.data = null;
this.xmlStreamReader = null;
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/CrawlerExecutorService.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/CrawlerExecutorService.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/CrawlerExecutorService.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/CrawlerExecutorService.java
Tue Mar 8 15:58:45 2011
@@ -47,18 +47,21 @@ public class CrawlerExecutorService exte
}
- @Override protected void beforeExecute(Thread t, Runnable r){
+ @Override
+ protected void beforeExecute(Thread t, Runnable r){
if (log.isTraceEnabled()) log.trace("beforeExecute() - t: " + t + ",
this: " + this.toString());
activeWorkers.add(t);
}
- @Override protected void afterExecute(Runnable r, Throwable t){
+ @Override
+ protected void afterExecute(Runnable r, Throwable t){
if (log.isTraceEnabled()) log.trace("afterExecute() - r: " + r + ",
this: " + this.toString());
activeWorkers.remove(Thread.currentThread());
lastCompleted = System.currentTimeMillis();
}
- @Override public String toString(){
+ @Override
+ public String toString(){
StringBuilder out = new StringBuilder();
out.append("CrawlerExecutorService ( ");
out.append("activeCount: ").append(this.getActiveCount());
Modified:
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/WeightComparator.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/WeightComparator.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/WeightComparator.java
(original)
+++
incubator/droids/trunk/droids-crawler/src/main/java/org/apache/droids/crawler/util/WeightComparator.java
Tue Mar 8 15:58:45 2011
@@ -20,6 +20,8 @@ package org.apache.droids.crawler.util;
import java.util.Comparator;
public class WeightComparator implements Comparator {
+
+ @Override
public int compare(Object link1, Object link2) {
int weight1 = link1 instanceof Weighted ? ((Weighted)
link1).getWeight() : 0;
int weight2 = link2 instanceof Weighted ? ((Weighted)
link2).getWeight() : 0;
Modified:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1079404&r1=1079403&r2=1079404&view=diff
==============================================================================
---
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
(original)
+++
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
Tue Mar 8 15:58:45 2011
@@ -50,6 +50,7 @@ public class TikaHtmlParser extends Logg
this.elements = elements;
}
+ @Override
public Parse parse(ContentEntity entity, Link link) throws IOException,
DroidsException {
// Init Tika objects
org.apache.tika.parser.Parser parser = new AutoDetectParser();