Author: skitching Date: Sun Feb 6 03:33:22 2005 New Revision: 151579 URL: http://svn.apache.org/viewcvs?view=rev&rev=151579 Log: * setKnownEntities/registerKnownEntities: a target URL of "" now means resolve the entity to an empty InputSource. * added methods setIgnoreExternalDTD/getIgnoreExternalDTD
Modified: jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java Modified: jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java URL: http://svn.apache.org/viewcvs/jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java?view=diff&r1=151578&r2=151579 ============================================================================== --- jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java (original) +++ jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java Sun Feb 6 03:33:22 2005 @@ -380,6 +380,10 @@ * default Entity Resolver when resolving entities in the input xml * (including the DTD or schema specified with the DOCTYPE). * <p> + * If the value in a map entry (ie the "URI") is an empty string, then + * when the parser asks for the entity to be resolved, an empty InputSource + * will be returned, effectively ignoring the entity. + * <p> * See [EMAIL PROTECTED] #getKnownEntities}, and [EMAIL PROTECTED] #setEntityResolver}. */ public void setKnownEntities(Map knownEntities) { @@ -398,6 +402,10 @@ * done for the input document's DTD, so that the DTD can be retrieved * from a local file.</p> * + * <p>If the value in a map entry (ie the "URI") is an empty string, then + * when the parser asks for the entity to be resolved, an empty InputSource + * will be returned, effectively ignoring the entity.</p> + * * <p>This implementation provides only basic functionality. If more * sophisticated features are required,using [EMAIL PROTECTED] #setEntityResolver} to * set a custom resolver is recommended. Note in particular that if the @@ -429,6 +437,27 @@ return saxHandler.getKnownEntities(); } + /** + * Specify whether an external DTD should be ignored, ie treated as if + * it were an empty file. This can be dangerous; DTDs can potentially + * contain definitions for default attribute values and entities that + * affect the meaning of the xml document, so skipping them can cause + * incorrect output. However in many cases it is known that the DTD + * does no such thing, so processing of it can be suppressed. + * <p> + * This flag defaults to false (ie external dtds are read during the parse). + */ + public void setIgnoreExternalDTD(boolean state) { + saxHandler.setIgnoreExternalDTD(state); + } + + /** + * See setIgnoreExternalDTD. + */ + public boolean getIgnoreExternalDTD() { + return saxHandler.getIgnoreExternalDTD(); + } + // ------------------------------------------------------- Public Methods /** Modified: jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java URL: http://svn.apache.org/viewcvs/jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java?view=diff&r1=151578&r2=151579 ============================================================================== --- jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java (original) +++ jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java Sun Feb 6 03:33:22 2005 @@ -23,6 +23,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.io.StringReader; + import java.lang.reflect.InvocationTargetException; import java.util.EmptyStackException; import java.util.HashMap; @@ -126,6 +128,13 @@ private Locator locator = null; /** + * A count of the number of entities resolved. Currently, we only + * care whether this is zero or one, so a boolean could do as well. + * However it seems likely that a count could be useful at some time. + */ + private int numEntitiesResolved = 0; + + /** * A map of known external entities that input xml documents may refer to. * via public or system IDs. The keys of the map entries are public or * system IDs, and the values are URLs (typically local files) pointing @@ -139,6 +148,11 @@ * See setAllowUnknownExternalEntities. */ private boolean allowUnknownExternalEntities = false; + + /** + * See setIgnoreExternalDTD. + */ + private boolean ignoreExternalDTD = false; /** * An object which contains state information that evolves @@ -348,6 +362,7 @@ /** * Get the system identifier of the DTD associated with the document * currently being parsed, or most recently parsed. + * * <p> * If the input document has no DOCTYPE declaration, then null will * be returned. @@ -356,6 +371,10 @@ * the org.xml.sax.ext.LexicalHandler interface. If the parser does not * provide callbacks via this interface, then no system id information * will be available (null will be returned). + * <p> + * Note also that the SystemId value returned is exactly as it was + * defined in the DOCTYPE tag; relative URLs are NOT resolved relative + * to the base of the current document. */ public String getDTDSystemId() { return this.dtdSystemId; @@ -576,6 +595,10 @@ * Specifies a map of (publicId->URI) pairings that will be used when * resolving entities in the input xml (including the DTD specified with * DOCTYPE, or schema specified with xsi:schemaLocation). + * <p> + * If the value in a map entry (ie the "URI") is an empty string, then + * when the parser asks for the entity to be resolved, an empty InputSource + * will be returned, effectively ignoring the entity. */ public void setKnownEntities(Map knownEntities) { this.knownEntities = knownEntities; @@ -598,6 +621,10 @@ * done for the input document's DTD, so that the DTD can be retrieved * from a local file.</p> * + * <p>If the value in a map entry (ie the "URI") is an empty string, then + * when the parser asks for the entity to be resolved, an empty InputSource + * will be returned, effectively ignoring the entity.</p> + * * <p>This implementation provides only basic functionality. If more * sophisticated features are required,using [EMAIL PROTECTED] #setEntityResolver} to * set a custom resolver is recommended. Note in particular that if the @@ -646,6 +673,27 @@ } /** + * Specify whether an external DTD should be ignored, ie treated as if + * it were an empty file. This can be dangerous; DTDs can potentially + * contain definitions for default attribute values and entities that + * affect the meaning of the xml document, so skipping them can cause + * incorrect output. However in many cases it is known that the DTD + * does no such thing, so processing of it can be suppressed. + * <p> + * This flag defaults to false (ie external dtds are read during the parse). + */ + public void setIgnoreExternalDTD(boolean state) { + ignoreExternalDTD = state; + } + + /** + * See setIgnoreExternalDTD. + */ + public boolean getIgnoreExternalDTD() { + return ignoreExternalDTD; + } + + /** * Add a (pattern, action) pair to the RuleManager instance associated * with this saxHandler. This is equivalent to * <pre> @@ -862,6 +910,7 @@ saxLog.debug("startDocument()"); } + numEntitiesResolved = 0; dtdPublicId = null; dtdSystemId = null; @@ -1352,6 +1401,31 @@ } } + // Keep count of the number of entities resolved. Currently, we only + // care whether this is zero or one, so a boolean could do as well. + // However it seems likely that a count could be useful at some time. + ++numEntitiesResolved; + + // Is this the DTD? If there *is* a DTD (ie one was reported to the + // lexical handler) then it is presumed here that it will be the first + // entity resolved. + // + // Note that we can't just check whether this systemId is the same + // as the dtdSystemId, because the systemId parameter here has been + // expanded to an absolute ref, while the one passed to the + // LexicalHandler is in its original (possibly relative) form. + // + // It would be great to be able to use the EntityResolver2 interface + // which provides both the original and system ids, but that is + // probably not supported widely enough yet. + if ((numEntitiesResolved == 1) && (dtdSystemId != null)) { + if (ignoreExternalDTD) { + // this entity is the DTD, and the user wants to completely + // ignore it, so we return an "empty file". + return new InputSource(new StringReader("")); + } + } + // Has this public identifier been registered? String entityURL = null; if (publicId != null) { @@ -1398,15 +1472,18 @@ + " registered as a known entity, and systemId is null."); } - // Return an input source to our alternative URL - if (log.isDebugEnabled()) { - log.debug(" Resolving entity to '" + entityURL + "'"); - } - - try { + if (entityURL.length() == 0) { + // special case: when the user has mapped an empty to a URL being + // the empty string, we return an empty InputSource to the parser, + // effectively ignoring the entity. + return new InputSource(new StringReader("")); + } else { + // Return an input source to our alternative URL + if (log.isDebugEnabled()) { + log.debug(" Resolving entity to '" + entityURL + "'"); + } + return new InputSource(entityURL); - } catch (Exception e) { - throw createSAXException(e); } } @@ -1544,6 +1621,8 @@ * Invoked when the DOCTYPE tag is found in the input xml. The public * and system ids present in that declaration are stored and can be * retrieved later via the getDTDPublicId and getDTDSystemId methods. + * <p> + * This method is always preceded by startDocument. */ public void startDTD(String name, String publicId, String systemId) { dtdPublicId = publicId; @@ -1551,7 +1630,9 @@ } /** - * See [EMAIL PROTECTED] #startDTD}. + * See [EMAIL PROTECTED] #startDTD}. + * <p> + * This method always precedes the first startElement. */ public void endDTD() { ; // ignore --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]