This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4338
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bdd38ef7901ae4786c48ae2fe1666d99dd11a4bb
Author: tallison <[email protected]>
AuthorDate: Fri Oct 25 09:28:30 2024 -0400

    TIKA-4338 -- remove tag soup everywhere else -- revert 
MatchingContentHandler and update tika-bundle-standard
---
 tika-bundles/tika-bundle-standard/pom.xml                             | 2 +-
 .../main/java/org/apache/tika/sax/xpath/MatchingContentHandler.java   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tika-bundles/tika-bundle-standard/pom.xml 
b/tika-bundles/tika-bundle-standard/pom.xml
index 3bc8cec76..1f47b5f50 100644
--- a/tika-bundles/tika-bundle-standard/pom.xml
+++ b/tika-bundles/tika-bundle-standard/pom.xml
@@ -174,7 +174,7 @@
               jackcess|
               jackcess-encrypt|
               commons-lang3|
-              tagsoup|
+              jsoup|
               asm|
               juniversalchardet|
               vorbis-java-core|
diff --git 
a/tika-core/src/main/java/org/apache/tika/sax/xpath/MatchingContentHandler.java 
b/tika-core/src/main/java/org/apache/tika/sax/xpath/MatchingContentHandler.java
index 0648098d1..831611c06 100644
--- 
a/tika-core/src/main/java/org/apache/tika/sax/xpath/MatchingContentHandler.java
+++ 
b/tika-core/src/main/java/org/apache/tika/sax/xpath/MatchingContentHandler.java
@@ -69,6 +69,10 @@ public class MatchingContentHandler extends 
ContentHandlerDecorator {
         if (matcher.matchesElement()) {
             super.endElement(uri, localName, name);
         }
+        // this was originally added for tagsoup, but we need it generally
+        if (!matchers.isEmpty()) {
+            matcher = matchers.removeFirst();
+        }
     }
 
     public void characters(char[] ch, int start, int length) throws 
SAXException {

Reply via email to