This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch 1290
in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git

commit cef1869a8f6b90cc82b2dc1d294c4cfcec886606
Author: Richard Zowalla <[email protected]>
AuthorDate: Mon Sep 9 15:02:30 2024 +0200

    Fix #1290 "Add close/cleanup method to ParseFilters"
---
 .../main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java   | 7 +++++++
 .../main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java  | 7 +++++++
 .../java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java     | 7 +++++++
 .../src/main/java/org/apache/stormcrawler/parse/ParseFilter.java | 9 +++++++++
 .../main/java/org/apache/stormcrawler/parse/ParseFilters.java    | 7 +++++++
 .../src/main/java/org/apache/stormcrawler/tika/ParserBolt.java   | 7 +++++++
 6 files changed, 44 insertions(+)

diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java
index d1dde17c..07b4a7ad 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java
@@ -240,4 +240,11 @@ public class FeedParserBolt extends StatusEmitterBolt {
         super.declareOutputFields(declarer);
         declarer.declare(new Fields("url", "content", "metadata"));
     }
+
+    @Override
+    public void cleanup() {
+        if(parseFilters != null) {
+            parseFilters.cleanup();
+        }
+    }
 }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
index 44e994ef..abb3a96e 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
@@ -538,4 +538,11 @@ public class JSoupParserBolt extends StatusEmitterBolt {
 
         return new LinkedList<>(outlinks.values());
     }
+
+    @Override
+    public void cleanup() {
+        if(parseFilters != null) {
+            parseFilters.cleanup();
+        }
+    }
 }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java
index f8191e8d..011f0c3b 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java
@@ -382,4 +382,11 @@ public class SiteMapParserBolt extends StatusEmitterBolt {
         int position = Bytes.indexOf(beginning, clue);
         return position != -1;
     }
+
+    @Override
+    public void cleanup() {
+        if(parseFilters != null) {
+            parseFilters.cleanup();
+        }
+    }
 }
diff --git a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java 
b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java
index 2495a8ce..6da55bb7 100644
--- a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java
+++ b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilter.java
@@ -16,6 +16,7 @@
  */
 package org.apache.stormcrawler.parse;
 
+import org.apache.storm.task.IBolt;
 import org.apache.stormcrawler.util.AbstractConfigurable;
 import org.w3c.dom.DocumentFragment;
 
@@ -48,4 +49,12 @@ public abstract class ParseFilter extends 
AbstractConfigurable {
     public boolean needsDOM() {
         return false;
     }
+
+    /**
+     * Might be used to clean any resources assosciated with this {@link 
ParseFilter}.-
+     * See {@link IBolt#cleanup()} for more details.
+     */
+    public void cleanup() {
+        //nothing to do here
+    }
 }
diff --git a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java 
b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java
index e2a0723e..864fdbc6 100644
--- a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java
+++ b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java
@@ -146,6 +146,13 @@ public class ParseFilters extends ParseFilter implements 
JSONResource {
         }
     }
 
+    @Override
+    public void cleanup() {
+        for (ParseFilter filter : filters) {
+            filter.cleanup();
+        }
+    }
+
     /**
      * * Used for quick testing + debugging
      *
diff --git 
a/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java 
b/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java
index 94204039..51cfd1ba 100644
--- a/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java
+++ b/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java
@@ -422,4 +422,11 @@ public class ParserBolt extends BaseRichBolt {
         }
         return new ArrayList<>(outlinks.values());
     }
+
+    @Override
+    public void cleanup() {
+        if(parseFilters != null) {
+            parseFilters.cleanup();
+        }
+    }
 }

Reply via email to