This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch 1290 in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git
commit cef1869a8f6b90cc82b2dc1d294c4cfcec886606 Author: Richard Zowalla <[email protected]> AuthorDate: Mon Sep 9 15:02:30 2024 +0200 Fix #1290 "Add close/cleanup method to ParseFilters" --- .../main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java | 7 +++++++ .../main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java | 7 +++++++ .../java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java | 7 +++++++ .../src/main/java/org/apache/stormcrawler/parse/ParseFilter.java | 9 +++++++++ .../main/java/org/apache/stormcrawler/parse/ParseFilters.java | 7 +++++++ .../src/main/java/org/apache/stormcrawler/tika/ParserBolt.java | 7 +++++++ 6 files changed, 44 insertions(+) diff --git a/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java b/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java index d1dde17c..07b4a7ad 100644 --- a/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java +++ b/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java @@ -240,4 +240,11 @@ public class FeedParserBolt extends StatusEmitterBolt { super.declareOutputFields(declarer); declarer.declare(new Fields("url", "content", "metadata")); } + + @Override + public void cleanup() { + if(parseFilters != null) { + parseFilters.cleanup(); + } + } } diff --git a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java index 44e994ef..abb3a96e 100644 --- a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java +++ b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java @@ -538,4 +538,11 @@ public class JSoupParserBolt extends StatusEmitterBolt { return new LinkedList<>(outlinks.values()); } + + @Override + public void cleanup() { + if(parseFilters != null) { + parseFilters.cleanup(); + } + } } diff --git a/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java b/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java index f8191e8d..011f0c3b 100644 --- a/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java +++ b/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java @@ -382,4 +382,11 @@ public class SiteMapParserBolt extends StatusEmitterBolt { int position = Bytes.indexOf(beginning, clue); return position != -1; } + + @Override + public void cleanup() { + if(parseFilters != null) { + parseFilters.cleanup(); + } + } } diff --git a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java index 2495a8ce..6da55bb7 100644 --- a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java +++ b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java @@ -16,6 +16,7 @@ */ package org.apache.stormcrawler.parse; +import org.apache.storm.task.IBolt; import org.apache.stormcrawler.util.AbstractConfigurable; import org.w3c.dom.DocumentFragment; @@ -48,4 +49,12 @@ public abstract class ParseFilter extends AbstractConfigurable { public boolean needsDOM() { return false; } + + /** + * Might be used to clean any resources assosciated with this {@link ParseFilter}.- + * See {@link IBolt#cleanup()} for more details. + */ + public void cleanup() { + //nothing to do here + } } diff --git a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java index e2a0723e..864fdbc6 100644 --- a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java +++ b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java @@ -146,6 +146,13 @@ public class ParseFilters extends ParseFilter implements JSONResource { } } + @Override + public void cleanup() { + for (ParseFilter filter : filters) { + filter.cleanup(); + } + } + /** * * Used for quick testing + debugging * diff --git a/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java b/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java index 94204039..51cfd1ba 100644 --- a/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java +++ b/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java @@ -422,4 +422,11 @@ public class ParserBolt extends BaseRichBolt { } return new ArrayList<>(outlinks.values()); } + + @Override + public void cleanup() { + if(parseFilters != null) { + parseFilters.cleanup(); + } + } }
