This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit b7246b4f3ff02e7358cac79283e76c91fe046663 Author: Colm O hEigeartaigh <[email protected]> AuthorDate: Tue Jun 16 17:54:34 2020 +0100 Disable external DTD + Stylesheets with the TransformerFactory (#276) Sorry for our delay. Thank you! --- .../java/org/apache/tika/config/TikaConfigSerializer.java | 4 +--- .../main/java/org/apache/tika/mime/MimeTypesReader.java | 8 +++----- .../main/java/org/apache/tika/utils/XMLReaderUtils.java | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java index c67b03b..e59b29e 100644 --- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java +++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java @@ -27,7 +27,6 @@ import java.util.concurrent.ExecutorService; import javax.xml.parsers.DocumentBuilder; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; @@ -92,8 +91,7 @@ public class TikaConfigSerializer { // TODO Service Loader section // now write - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = transformerFactory.newTransformer(); + Transformer transformer = XMLReaderUtils.getTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); transformer.setOutputProperty(OutputKeys.ENCODING, charset.name()); diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java index ff266fe..12ac4ec 100644 --- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java +++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java @@ -22,7 +22,6 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXResult; import java.io.ByteArrayInputStream; @@ -38,6 +37,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.tika.exception.TikaException; +import org.apache.tika.utils.XMLReaderUtils; import org.w3c.dom.Document; import org.xml.sax.Attributes; import org.xml.sax.InputSource; @@ -150,11 +150,9 @@ public class MimeTypesReader extends DefaultHandler implements MimeTypesReaderMe public void read(Document document) throws MimeTypeException { try { - TransformerFactory factory = TransformerFactory.newInstance(); - factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - Transformer transformer = factory.newTransformer(); + Transformer transformer = XMLReaderUtils.getTransformer(); transformer.transform(new DOMSource(document), new SAXResult(this)); - } catch (TransformerException e) { + } catch (TransformerException | TikaException e) { throw new MimeTypeException("Failed to parse type registry", e); } } diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java index 9e9f855..291ba2b 100644 --- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java +++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java @@ -333,6 +333,18 @@ public class XMLReaderUtils implements Serializable { return factory; } + private static void trySetTransformerAttribute(TransformerFactory transformerFactory, String attribute, String value) { + try { + transformerFactory.setAttribute(attribute, value); + } catch (SecurityException e) { + throw e; + } catch (Exception e) { + LOG.warn("Transformer Attribute unsupported: {}", attribute, e); + } catch (AbstractMethodError ame) { + LOG.warn("Cannot set Transformer attribute because outdated XML parser in classpath: {}", attribute, ame); + } + } + private static void trySetSAXFeature(SAXParserFactory saxParserFactory, String feature, boolean enabled) { try { saxParserFactory.setFeature(feature, enabled); @@ -377,6 +389,8 @@ public class XMLReaderUtils implements Serializable { try { TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + trySetTransformerAttribute(transformerFactory, XMLConstants.ACCESS_EXTERNAL_DTD, ""); + trySetTransformerAttribute(transformerFactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); return transformerFactory.newTransformer(); } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) { throw new TikaException("Transformer not available", e);
