This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit b7246b4f3ff02e7358cac79283e76c91fe046663
Author: Colm O hEigeartaigh <[email protected]>
AuthorDate: Tue Jun 16 17:54:34 2020 +0100

    Disable external DTD + Stylesheets with the TransformerFactory (#276)
    
    Sorry for our delay.  Thank you!
---
 .../java/org/apache/tika/config/TikaConfigSerializer.java  |  4 +---
 .../main/java/org/apache/tika/mime/MimeTypesReader.java    |  8 +++-----
 .../main/java/org/apache/tika/utils/XMLReaderUtils.java    | 14 ++++++++++++++
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git 
a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java 
b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index c67b03b..e59b29e 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -27,7 +27,6 @@ import java.util.concurrent.ExecutorService;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
@@ -92,8 +91,7 @@ public class TikaConfigSerializer {
         // TODO Service Loader section
 
         // now write
-        TransformerFactory transformerFactory = 
TransformerFactory.newInstance();
-        Transformer transformer = transformerFactory.newTransformer();
+        Transformer transformer = XMLReaderUtils.getTransformer();
         transformer.setOutputProperty(OutputKeys.INDENT, "yes");
         
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount";, "2");
         transformer.setOutputProperty(OutputKeys.ENCODING, charset.name());
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java 
b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
index ff266fe..12ac4ec 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
@@ -22,7 +22,6 @@ import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.sax.SAXResult;
 import java.io.ByteArrayInputStream;
@@ -38,6 +37,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.xml.sax.Attributes;
 import org.xml.sax.InputSource;
@@ -150,11 +150,9 @@ public class MimeTypesReader extends DefaultHandler 
implements MimeTypesReaderMe
 
     public void read(Document document) throws MimeTypeException {
         try {
-            TransformerFactory factory = TransformerFactory.newInstance();
-            factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-            Transformer transformer = factory.newTransformer();
+            Transformer transformer = XMLReaderUtils.getTransformer();
             transformer.transform(new DOMSource(document), new 
SAXResult(this));
-        } catch (TransformerException e) {
+        } catch (TransformerException | TikaException e) {
             throw new MimeTypeException("Failed to parse type registry", e);
         }
     }
diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java 
b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 9e9f855..291ba2b 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -333,6 +333,18 @@ public class XMLReaderUtils implements Serializable {
         return factory;
     }
 
+    private static void trySetTransformerAttribute(TransformerFactory 
transformerFactory, String attribute, String value) {
+        try {
+            transformerFactory.setAttribute(attribute, value);
+        } catch (SecurityException e) {
+            throw e;
+        } catch (Exception e) {
+            LOG.warn("Transformer Attribute unsupported: {}", attribute, e);
+        } catch (AbstractMethodError ame) {
+            LOG.warn("Cannot set Transformer attribute because outdated XML 
parser in classpath: {}", attribute, ame);
+        }
+    }
+
     private static void trySetSAXFeature(SAXParserFactory saxParserFactory, 
String feature, boolean enabled) {
         try {
             saxParserFactory.setFeature(feature, enabled);
@@ -377,6 +389,8 @@ public class XMLReaderUtils implements Serializable {
         try {
             TransformerFactory transformerFactory = 
TransformerFactory.newInstance();
             
transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+            trySetTransformerAttribute(transformerFactory, 
XMLConstants.ACCESS_EXTERNAL_DTD, "");
+            trySetTransformerAttribute(transformerFactory, 
XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
             return transformerFactory.newTransformer();
         } catch (TransformerConfigurationException | 
TransformerFactoryConfigurationError e) {
             throw new TikaException("Transformer not available", e);

Reply via email to