Author: thorsten
Date: Fri Feb 15 22:23:28 2013
New Revision: 1446791
URL: http://svn.apache.org/r1446791
Log:
Reducing the exception type we throw and refactored the code to throw only
DroidsException
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Fetcher.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaParser.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileFetcher.java
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
Fri Feb 15 22:23:28 2013
@@ -16,6 +16,10 @@
*/
package org.apache.droids.core;
+import java.io.IOException;
+import java.util.Queue;
+import java.util.concurrent.TimeUnit;
+
import org.apache.droids.helper.factories.FilterFactory;
import org.apache.droids.helper.factories.HandlerFactory;
import org.apache.droids.helper.factories.ParserFactory;
@@ -23,10 +27,6 @@ import org.apache.droids.taskmaster.Mult
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Queue;
-import java.util.concurrent.TimeUnit;
-
/**
* Manage common tasks in standard Droids
*
@@ -138,7 +138,7 @@ public abstract class AbstractDroid<T ex
}
@Override
- public void load(T task) throws DroidsException, IOException {
+ public void load(T task) throws DroidsException {
logger.debug("load task: " + task.getURI());
if (this.fetcher == null) {
throw new DroidsException("Fetcher not set");
@@ -148,20 +148,24 @@ public abstract class AbstractDroid<T ex
}
@Override
- public void parse(T task) throws DroidsException, IOException {
+ public void parse(T task) throws DroidsException {
logger.debug("parse task: " + task.getURI());
this.parserFactory.parse(task);
}
@Override
- public void handle(T task) throws DroidsException, IOException {
+ public void handle(T task) throws DroidsException {
logger.debug("handle task: " + task.getURI());
this.handlerFactory.handle(task);
}
@Override
- public void finish(T task) throws DroidsException, IOException {
- task.getContentEntity().close();
+ public void finish(T task) throws DroidsException {
+ try {
+ task.getContentEntity().close();
+ } catch (IOException e) {
+ throw new DroidsException(e);
+ }
logger.debug("finished task: " + task.getURI());
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
Fri Feb 15 22:23:28 2013
@@ -16,7 +16,6 @@
*/
package org.apache.droids.core;
-import java.io.IOException;
/**
* Interface for a droid. Droid can be seen as a "project manger" that
delegates
@@ -58,21 +57,21 @@ public interface Droid<T extends Task> {
*
* @param task the task for loading the content
*/
- public void load(T task) throws DroidsException, IOException;
+ public void load(T task) throws DroidsException;
/**
* Parse the task with the defined Parsers.
*
* @param task the task to parse
*/
- public void parse(T task) throws DroidsException, IOException;
+ public void parse(T task) throws DroidsException;
/**
* Handle the task.
*
* @param task the task to handle
*/
- public void handle(T task) throws DroidsException, IOException;
+ public void handle(T task) throws DroidsException;
/**
* Finish the task.
@@ -80,7 +79,7 @@ public interface Droid<T extends Task> {
*
* @param task the task to handle
*/
- public void finish(T task) throws DroidsException, IOException;
+ public void finish(T task) throws DroidsException;
/**
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Fetcher.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Fetcher.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Fetcher.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Fetcher.java
Fri Feb 15 22:23:28 2013
@@ -33,7 +33,7 @@ public interface Fetcher<T extends Task>
* @return true if we can request the task. false if we are forbidden.
* @throws IOException
*/
- public boolean isAllowed(T task) throws IOException;
+ public boolean isAllowed(T task) throws DroidsException;
/**
* Return the content entity represent of the url
@@ -41,6 +41,6 @@ public interface Fetcher<T extends Task>
* @param task the task we want to retrieve.
* @throws IOException
*/
- public void fetch(T task) throws IOException;
+ public void fetch(T task) throws DroidsException;
}
\ No newline at end of file
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Parser.java
Fri Feb 15 22:23:28 2013
@@ -16,8 +16,6 @@
*/
package org.apache.droids.core;
-import java.io.IOException;
-
/**
* Simple parser that is only parsing the data of the task.
*
@@ -29,5 +27,5 @@ public interface Parser<T extends Task>
*
* @param task the task that correspond to the stream
*/
- public void parse(T task) throws DroidsException, IOException;
+ public void parse(T task) throws DroidsException;
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/HandlerFactory.java
Fri Feb 15 22:23:28 2013
@@ -47,9 +47,13 @@ public class HandlerFactory {
* @param task the task to handle
*/
public void handle(Task task)
- throws DroidsException, IOException {
+ throws DroidsException {
for (Handler handler : handlers) {
- handler.handle(task);
+ try {
+ handler.handle(task);
+ } catch (IOException e) {
+ throw new DroidsException(e);
+ }
}
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/helper/factories/ParserFactory.java
Fri Feb 15 22:23:28 2013
@@ -16,14 +16,13 @@
*/
package org.apache.droids.helper.factories;
+import java.util.HashSet;
+import java.util.Set;
+
import org.apache.droids.core.DroidsException;
import org.apache.droids.core.Parser;
import org.apache.droids.core.Task;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
-
/**
* Factory that will lookup a parser by its identifier and return it.
*
@@ -40,7 +39,7 @@ public class ParserFactory {
this.parsers.add(parser);
}
- public void parse(Task task) throws DroidsException, IOException {
+ public void parse(Task task) throws DroidsException {
for (Parser parser : parsers) {
parser.parse(task);
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/FileNameParser.java
Fri Feb 15 22:23:28 2013
@@ -20,8 +20,6 @@ import org.apache.droids.core.DroidsExce
import org.apache.droids.core.Parser;
import org.apache.droids.core.Task;
-import java.io.IOException;
-
/**
* Simple Parser implementation extracting the path component from
* the URI of the task.
@@ -35,7 +33,7 @@ public class FileNameParser implements P
@Override
- public void parse(Task task) throws DroidsException, IOException {
+ public void parse(Task task) throws DroidsException {
String path = task.getURI().getPath();
task.getParserData().set(FILENAME,
path.substring(path.lastIndexOf('/') + 1));
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/parse/SimpleLinkParser.java
Fri Feb 15 22:23:28 2013
@@ -16,14 +16,6 @@
*/
package org.apache.droids.parse;
-import org.apache.droids.core.DroidsException;
-import org.apache.droids.core.LinkedTask;
-import org.apache.droids.core.Parser;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.slf4j.spi.LocationAwareLogger;
-
-import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Scanner;
@@ -31,6 +23,12 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.droids.core.DroidsException;
+import org.apache.droids.core.LinkedTask;
+import org.apache.droids.core.Parser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Simple Parser implementation for extraction links from a website using
* the href Attribute of an anchor (a) Element.
@@ -41,7 +39,7 @@ public class SimpleLinkParser implements
Logger logger = LoggerFactory.getLogger(SimpleLinkParser.class);
@Override
- public void parse(LinkedTask task) throws DroidsException, IOException {
+ public void parse(LinkedTask task) throws DroidsException {
logger.info("parse " + task.getURI());
InputStream inStream = task.getContentEntity().getContent();
if (inStream != null) {
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
Fri Feb 15 22:23:28 2013
@@ -16,14 +16,22 @@
*/
package org.apache.droids.fetcher;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+
import org.apache.droids.core.ContentEntity;
+import org.apache.droids.core.DroidsException;
import org.apache.droids.core.Fetcher;
import org.apache.droids.core.LinkedTask;
import org.apache.droids.norobots.ContentLoader;
import org.apache.droids.norobots.HttpClientContentLoader;
import org.apache.droids.norobots.NoRobotClient;
-import org.apache.droids.norobots.NoRobotException;
-import org.apache.http.*;
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.HttpStatus;
+import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.methods.HttpGet;
@@ -33,13 +41,6 @@ import org.apache.http.params.CoreProtoc
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.HashMap;
-import java.util.Map;
-
/**
*
*
@@ -65,7 +66,7 @@ public class CrawlingFetcher implements
}
@Override
- public boolean isAllowed(LinkedTask task) throws IOException {
+ public boolean isAllowed(LinkedTask task) throws DroidsException {
if (forceAllow) {
return forceAllow;
}
@@ -83,7 +84,7 @@ public class CrawlingFetcher implements
NoRobotClient nrc = new NoRobotClient(contentLoader, userAgent);
try {
nrc.parse(baseURI);
- } catch (NoRobotException ex) {
+ } catch (Exception ex) {
logger.error("Failure parsing robots.txt: " + ex.getMessage());
return false;
}
@@ -96,23 +97,28 @@ public class CrawlingFetcher implements
}
@Override
- public void fetch(LinkedTask task) throws IOException {
- HttpGet httpget = new HttpGet(task.getURI());
- HttpResponse response = httpClient.execute(httpget);
- StatusLine statusline = response.getStatusLine();
- if (statusline.getStatusCode() >= HttpStatus.SC_BAD_REQUEST) {
- httpget.abort();
- throw new HttpResponseException(statusline.getStatusCode(),
statusline.getReasonPhrase());
- }
- HttpEntity entity = response.getEntity();
- if (entity != null) {
- ContentEntity contentEntity = task.getContentEntity();
- InputStream instream = entity.getContent();
- contentEntity.setContent(instream);
- Map<String, String> headerData = new HashMap<String, String>();
- for (Header header : response.getAllHeaders()) {
- contentEntity.set(header.getName(), header.getValue());
+ public void fetch(LinkedTask task) throws DroidsException {
+ try {
+ HttpGet httpget = new HttpGet(task.getURI());
+ HttpResponse response = httpClient.execute(httpget);
+ StatusLine statusline = response.getStatusLine();
+ if (statusline.getStatusCode() >= HttpStatus.SC_BAD_REQUEST) {
+ httpget.abort();
+ throw new DroidsException(new
HttpResponseException(statusline.getStatusCode(),
statusline.getReasonPhrase()));
+ }
+ HttpEntity entity = response.getEntity();
+ if (entity != null) {
+ ContentEntity contentEntity = task.getContentEntity();
+ InputStream instream = entity.getContent();
+ contentEntity.setContent(instream);
+ // not used
+ //Map<String, String> headerData = new HashMap<String,
String>();
+ for (Header header : response.getAllHeaders()) {
+ contentEntity.set(header.getName(), header.getValue());
+ }
}
+ } catch (Exception e) {
+ throw new DroidsException(e);
}
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaParser.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaParser.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaParser.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-tika/src/main/java/org/apache/droids/tika/TikaParser.java
Fri Feb 15 22:23:28 2013
@@ -59,7 +59,7 @@ public class TikaParser<T extends Task>
protected static final Logger logger =
LoggerFactory.getLogger(TikaParser.class);
@Override
- public void parse(T task) throws DroidsException, IOException {
+ public void parse(T task) throws DroidsException {
// Init Tika objects
org.apache.tika.parser.Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
@@ -117,12 +117,14 @@ public class TikaParser<T extends Task>
for (String key : metadata.names()) {
task.getParserData().set(key, metadata.getValues(key));
}
- } catch (SAXException ex) {
- throw new DroidsException("Failure parsing document " +
task.getURI(), ex);
- } catch (TikaException ex) {
+ } catch (Exception ex) {
throw new DroidsException("Failure parsing document " +
task.getURI(), ex);
} finally {
- instream.close();
+ try {
+ instream.close();
+ } catch (IOException e) {
+ throw new DroidsException("Failure closing input stream for
document " + task.getURI(), e);
+ }
}
}
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileFetcher.java
URL:
http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileFetcher.java?rev=1446791&r1=1446790&r2=1446791&view=diff
==============================================================================
---
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileFetcher.java
(original)
+++
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileFetcher.java
Fri Feb 15 22:23:28 2013
@@ -20,9 +20,11 @@ package org.apache.droids.walker;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
+import org.apache.droids.core.DroidsException;
import org.apache.droids.core.Fetcher;
/**
@@ -40,10 +42,14 @@ public class FileFetcher implements Fetc
}
@Override
- public void fetch(FileTask task) throws IOException {
- File file = new File(extractLocation(task.getURI()));
- task.getContentEntity().setContent(new
FileInputStream(task.getFile()));
- task.getContentEntity().setContentLength(file.length());
+ public void fetch(FileTask task) throws DroidsException {
+ try {
+ File file = new File(extractLocation(task.getURI()));
+ task.getContentEntity().setContent(new
FileInputStream(task.getFile()));
+ task.getContentEntity().setContentLength(file.length());
+ } catch (FileNotFoundException e) {
+ throw new DroidsException(e);
+ }
}
/**