Author: etnu
Date: Fri Sep 19 06:49:53 2008
New Revision: 697097
URL: http://svn.apache.org/viewvc?rev=697097&view=rev
Log:
Improved performance and security of XML Parsing by...
- Disabling any external entity file references (DTD, entities, xinclude)
- Re-using DocumentBuilderFactory
- Re-using DocumentBuilder when the dom implementation supports it.
The security issues are more substantial than the performance gains, but
anecdotally we saw a 10% drop in memory consumption and an overall redueced
response time. Most importantly, spikes in response times caused by document /
message bundle re-parse are eliminated, and XML processing is now not that
expensive at all.
Much better memory usage and parse times can be realized by upgrading to
xercesImpl 2.8+ (or any parser that supports DocumentBuilder.reset()).
Modified:
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/xml/XmlUtil.java
Modified:
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/xml/XmlUtil.java
URL:
http://svn.apache.org/viewvc/incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/xml/XmlUtil.java?rev=697097&r1=697096&r2=697097&view=diff
==============================================================================
---
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/xml/XmlUtil.java
(original)
+++
incubator/shindig/trunk/java/common/src/main/java/org/apache/shindig/common/xml/XmlUtil.java
Fri Sep 19 06:49:53 2008
@@ -18,6 +18,8 @@
package org.apache.shindig.common.xml;
+import com.google.common.collect.Lists;
+
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
@@ -30,13 +32,19 @@
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
+import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
+/**
+ * Utility class for simplifying parsing of xml documents. Documents are not
validated, and
+ * loading of external files (xinclude, external entities, DTDs, etc.) are
disabled.
+ */
public class XmlUtil {
private static final Logger LOG = Logger.getLogger(XmlUtil.class.getName());
// Handles xml errors so that they're not logged to stderr.
@@ -53,6 +61,57 @@
}
};
+ private static final List<DocumentBuilder> builderPool =
Lists.newArrayList();
+ private static boolean canUsePooling = false;
+
+ private static final DocumentBuilderFactory builderFactory
+ = DocumentBuilderFactory.newInstance();
+ static {
+ // Disable various insecure and/or expensive options.
+ builderFactory.setValidating(false);
+
+ // Can't disable doctypes entirely because they're usually harmless.
External entity
+ // resolution, however, is both expensive and insecure.
+ try {
+ builderFactory.setAttribute(
+ "http://xml.org/sax/features/external-general-entities", false);
+ } catch (IllegalArgumentException e) {
+ // Not supported by some very old parsers.
+ }
+
+ try {
+ builderFactory.setAttribute(
+ "http://xml.org/sax/features/external-parameter-entities", false);
+ } catch (IllegalArgumentException e) {
+ // Not supported by some very old parsers.
+ }
+
+ try {
+ builderFactory.setAttribute(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
false);
+ } catch (IllegalArgumentException e) {
+ // Only supported by Apache's XML parsers.
+ }
+
+ try {
+ builderFactory.setAttribute(XMLConstants.FEATURE_SECURE_PROCESSING,
true);
+ } catch (IllegalArgumentException e) {
+ // Not supported by older parsers.
+ }
+
+ try {
+ DocumentBuilder builder = builderFactory.newDocumentBuilder();
+ builder.reset();
+ canUsePooling = true;
+ } catch (UnsupportedOperationException e) {
+ // Only supported by newer parsers (xerces 2.8.x+ for instance).
+ canUsePooling = false;
+ } catch (ParserConfigurationException e) {
+ // Only supported by newer parsers (xerces 2.8.x+ for instance).
+ canUsePooling = false;
+ }
+ }
+
private XmlUtil() {}
/**
@@ -192,6 +251,39 @@
}
/**
+ * Fetch a builder from the pool, creating a new one only if necessary.
+ */
+ private static DocumentBuilder getBuilderFromPool() throws
ParserConfigurationException {
+ DocumentBuilder builder;
+ if (canUsePooling) {
+ synchronized (builderPool) {
+ int size = builderPool.size();
+ if (size > 0) {
+ builder = builderPool.remove(size - 1);
+ builder.reset();
+ } else {
+ builder = builderFactory.newDocumentBuilder();
+ }
+ }
+ } else {
+ builder = builderFactory.newDocumentBuilder();
+ }
+ builder.setErrorHandler(errorHandler);
+ return builder;
+ }
+
+ /**
+ * Return a builder to the pool, allowing it to be re-used by future
operations.
+ */
+ private static void returnBuilderToPool(DocumentBuilder builder) {
+ if (canUsePooling) {
+ synchronized (builderPool) {
+ builderPool.add(builder);
+ }
+ }
+ }
+
+ /**
* Attempts to parse the input xml into a single element.
* @param xml
* @return The document object
@@ -199,13 +291,14 @@
*/
public static Element parse(String xml) throws XmlException {
try {
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder = getBuilderFromPool();
InputSource is = new InputSource(new StringReader(xml.trim()));
- DocumentBuilder builder = factory.newDocumentBuilder();
- builder.setErrorHandler(errorHandler);
- return builder.parse(is).getDocumentElement();
+ Element element = builder.parse(is).getDocumentElement();
+ returnBuilderToPool(builder);
+ return element;
} catch (SAXParseException e) {
- throw new XmlException(e.getMessage()+" At: ("+e.getLineNumber()+ ','
+e.getColumnNumber()+ ')', e);
+ throw new XmlException(
+ e.getMessage() + " At: (" + e.getLineNumber() + ',' +
e.getColumnNumber() + ')', e);
} catch (SAXException e) {
throw new XmlException(e);
} catch (ParserConfigurationException e) {