luehe 2003/02/12 15:44:24 Modified: jasper2/src/share/org/apache/jasper/compiler Compiler.java PageInfo.java ParserController.java Validator.java jasper2/src/share/org/apache/jasper/xmlparser XMLEncodingDetector.java XercesEncodingDetector.java Log: Last round of encoding-determination changes, to comply with the spec. Revision Changes Path 1.52 +1 -1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Compiler.java Index: Compiler.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Compiler.java,v retrieving revision 1.51 retrieving revision 1.52 diff -u -r1.51 -r1.52 --- Compiler.java 12 Feb 2003 02:22:51 -0000 1.51 +++ Compiler.java 12 Feb 2003 23:44:22 -0000 1.52 @@ -210,7 +210,7 @@ pageInfo.setELIgnoredSpecified(true); } pageInfo.setIsXml(JspUtil.booleanValue(jspProperty.isXml())); - pageInfo.setPageEncoding(jspProperty.getPageEncoding()); + pageInfo.setConfigEncoding(jspProperty.getPageEncoding()); pageInfo.setELIgnored(JspUtil.booleanValue(jspProperty.isELIgnored())); pageInfo.setScriptingInvalid(JspUtil.booleanValue(jspProperty.isScriptingInvalid())); if (jspProperty.getIncludePrelude() != null) { 1.19 +30 -3 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java Index: PageInfo.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java,v retrieving revision 1.18 retrieving revision 1.19 diff -u -r1.18 -r1.19 --- PageInfo.java 12 Feb 2003 02:22:51 -0000 1.18 +++ PageInfo.java 12 Feb 2003 23:44:22 -0000 1.19 @@ -89,6 +89,17 @@ private String errorPage = null; private String pageEncoding = null; + // Encoding specified in JSP config element + private String configEncoding; + + /* + * Indicates whether an encoding has been explicitly specified in the + * page's XML prolog (only used for pages in XML syntax). + * This information is used to decide whether a translation error must + * be reported for encoding conflicts. + */ + private boolean isEncodingSpecifiedInProlog; + private int maxTagNesting = 0; private boolean scriptless = false; private boolean scriptingInvalid = false; @@ -228,6 +239,22 @@ public String getPageEncoding() { return pageEncoding; + } + + public void setIsEncodingSpecifiedInProlog(boolean isSpecified) { + this.isEncodingSpecifiedInProlog = isSpecified; + } + + public boolean isEncodingSpecifiedInProlog() { + return this.isEncodingSpecifiedInProlog; + } + + public void setConfigEncoding(String enc) { + this.configEncoding = enc; + } + + public String getConfigEncoding() { + return this.configEncoding; } public int getMaxTagNesting() { 1.32 +35 -27 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java Index: ParserController.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v retrieving revision 1.31 retrieving revision 1.32 diff -u -r1.31 -r1.32 --- ParserController.java 27 Jan 2003 23:15:33 -0000 1.31 +++ ParserController.java 12 Feb 2003 23:44:23 -0000 1.32 @@ -199,27 +199,23 @@ figureOutJspDocument(absFileName, jarFile); if (isTopFile) { - if (isXml) { - // Make sure the encoding determined from the XML prolog - // matches that in the JSP config element, if present. - // Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as identical. - String jspConfigPageEnc = pageInfo.getPageEncoding(); + if (isXml && pageInfo.isEncodingSpecifiedInProlog()) { + /* + * Make sure the encoding explicitly specified in the XML + * prolog (if any) matches that in the JSP config element + * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as + * identical. + */ + String jspConfigPageEnc = pageInfo.getConfigEncoding(); if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc) - && (!jspConfigPageEnc.startsWith("UTF-16") + && (!jspConfigPageEnc.startsWith("UTF-16") || !sourceEnc.startsWith("UTF-16"))) { err.jspError("jsp.error.prolog_config_encoding_mismatch", sourceEnc, jspConfigPageEnc); } - // override the encoding that may have been set from JSP config - // info (in Compiler.generateJava()), since that applies to - // standard syntax only - pageInfo.setPageEncoding(sourceEnc); - } else { - if (pageInfo.getPageEncoding() == null) { - pageInfo.setPageEncoding(sourceEnc); - } } + pageInfo.setPageEncoding(sourceEnc); pageInfo.setIsXml(isXml); isTopFile = false; } else { @@ -305,12 +301,20 @@ private void figureOutJspDocument(String fname, JarFile jarFile) throws JasperException, IOException { - // 'true' if the syntax of the page (XML or standard) is identified by - // external information: either via a JSP configuration element or - // the ".jspx" suffix + /* + * 'true' if the syntax of the page (XML or standard) is identified by + * external information: either via a JSP configuration element or + * the ".jspx" suffix + */ boolean isExternal = false; isXml = false; + /* + * Indicates whether we need to revert from temporary usage of + * "ISO-8859-1" back to "UTF-8" + */ + boolean revert = false; + if (pageInfo.isXmlSpecified()) { // If <is-xml> is specified in a <jsp-property-group>, it is used. isXml = pageInfo.isXml(); @@ -321,8 +325,8 @@ } if (isExternal && !isXml) { - // JSP syntax - if (pageInfo.getPageEncoding() != null) { + // JSP (standard) syntax + if (pageInfo.getConfigEncoding() != null) { // Encoding specified in jsp-config (used by standard syntax // only) sourceEnc = pageInfo.getPageEncoding(); @@ -332,18 +336,19 @@ sourceEnc = "ISO-8859-1"; } } else { - // XML syntax or unknown, autodetect encoding ... + // XML syntax or unknown, (auto)detect encoding ... Object[] ret = XMLEncodingDetector.getEncoding(fname, jarFile, ctxt, err); sourceEnc = (String) ret[0]; - boolean isFallback = ((Boolean) ret[1]).booleanValue(); - if (isFallback) { + if (((Boolean) ret[1]).booleanValue()) { + pageInfo.setIsEncodingSpecifiedInProlog(true); + } + + if (!isXml && sourceEnc.equals("UTF-8")) { /* - * Page does not have any XML prolog, or contains an XML - * prolog that is being used as template text (in standard - * syntax). This means that the page's encoding cannot be - * determined from the 'encoding' attribute of an XML prolog, - * or autodetected from an XML prolog. + * We don't know if we're dealing with XML or standard syntax. + * Therefore, we need to check and see if the page contains + * a <jsp:root> element. * * We need to be careful, because the page may be encoded in * ISO-8859-1 (or something entirely different), and may @@ -357,6 +362,7 @@ * and ISO-8859-1 are extensions of ASCII). */ sourceEnc = "ISO-8859-1"; + revert = true; } } @@ -389,6 +395,8 @@ Mark mark = jspReader.skipUntil(JSP_ROOT_TAG); if (mark != null) { isXml = true; + if (revert) + sourceEnc = "UTF-8"; return; } else { isXml = false; 1.75 +36 -30 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java Index: Validator.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java,v retrieving revision 1.74 retrieving revision 1.75 diff -u -r1.74 -r1.75 --- Validator.java 10 Feb 2003 17:07:08 -0000 1.74 +++ Validator.java 12 Feb 2003 23:44:23 -0000 1.75 @@ -252,34 +252,10 @@ err.jspError(n, "jsp.error.page.multiple.pageencoding"); pageEncodingSeen = true; /* - * It is a translation-time error to name different page - * character encodings in two or more of the following: - * the XML prolog of a JSP page, the pageEncoding - * attribute of the page directive of the JSP page, and in - * a JSP configuration element (whose URL pattern matches - * the page). - * - * At this point, we've already verified (in - * ParserController.parse()) that the page character - * encodings specified in a JSP config element and XML - * prolog match. - * - * Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as identical. + * Report any encoding conflict, treating "UTF-16", + * "UTF-16BE", and "UTF-16LE" as identical. */ - String compareEnc = pageInfo.getPageEncoding(); - if (!value.equals(compareEnc) - && (!value.startsWith("UTF-16") - || !compareEnc.startsWith("UTF-16"))) { - if (pageInfo.isXml()) { - err.jspError(n, - "jsp.error.prolog_pagedir_encoding_mismatch", - compareEnc, value); - } else { - err.jspError(n, - "jsp.error.config_pagedir_encoding_mismatch", - compareEnc, value); - } - } + compareEncodings(value, n, pageInfo); } } @@ -344,6 +320,36 @@ // Do nothing, since this variable directive has already been // validated by TagFileProcessor when it created a TagInfo object // from the tag file in which the directive appeared + } + + /* + * Compares the encoding specified in the 'pageEncoding' attribute of + * the page directive with the encoding explicitly specified in the + * XML prolog (only for XML syntax) and the encoding specified in + * the JSP config element whose URL pattern matches the page, and + * throws an error in case of a mismatch. + */ + private void compareEncodings(String pageDirEnc, Node n, + PageInfo pageInfo) + throws JasperException { + + String configEnc = pageInfo.getConfigEncoding(); + if (configEnc != null && !pageDirEnc.equals(configEnc) + && (!pageDirEnc.startsWith("UTF-16") + || !configEnc.startsWith("UTF-16"))) { + err.jspError(n, "jsp.error.config_pagedir_encoding_mismatch", + configEnc, pageDirEnc); + } + + if (pageInfo.isXml() && pageInfo.isEncodingSpecifiedInProlog()) { + String pageEnc = pageInfo.getPageEncoding(); + if (!pageDirEnc.equals(pageEnc) + && (!pageDirEnc.startsWith("UTF-16") + || !pageEnc.startsWith("UTF-16"))) { + err.jspError(n, "jsp.error.prolog_pagedir_encoding_mismatch", + pageEnc, pageDirEnc); + } + } } } 1.5 +1 -1 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java Index: XMLEncodingDetector.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- XMLEncodingDetector.java 27 Jan 2003 18:10:48 -0000 1.4 +++ XMLEncodingDetector.java 12 Feb 2003 23:44:23 -0000 1.5 @@ -104,7 +104,7 @@ ErrorDispatcher err) throws IOException, JasperException { - Object result[] = new Object[] { "UTF8", new Boolean(true) }; + Object result[] = new Object[] { "UTF8", new Boolean(false) }; return result; } } 1.5 +7 -11 jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java Index: XercesEncodingDetector.java =================================================================== RCS file: /home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- XercesEncodingDetector.java 27 Jan 2003 18:10:48 -0000 1.4 +++ XercesEncodingDetector.java 12 Feb 2003 23:44:23 -0000 1.5 @@ -80,7 +80,7 @@ private InputStream stream; private String encoding; - private boolean isFallback; + private boolean isEncodingSetInProlog; private Boolean isBigEndian; private Reader reader; @@ -134,10 +134,10 @@ * @param err The error dispatcher * * @return Two-element array, where the first element (of type - * java.lang.String) contains the name of the (auto)detected encoding, - * and the second element specifies whether the default encoding - * (UTF-8) is being used as a fallback (because no encoding could be - * detected). + * java.lang.String) contains the name of the (auto)detected encoding, and + * the second element (of type java.lang.Boolean) specifies whether the + * encoding was specified using the 'encoding' attribute of an XML prolog + * (TRUE) or autodetected (FALSE). */ public Object[] getEncoding(InputStream in, ErrorDispatcher err) throws IOException, JasperException @@ -149,7 +149,7 @@ detector.scanXMLDecl(); return new Object[] { detector.encoding, - new Boolean(detector.isFallback) }; + new Boolean(detector.isEncodingSetInProlog) }; } public Object[] getEncodingMethod(String fname, JarFile jarFile, @@ -319,7 +319,6 @@ private Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { - isFallback = true; return new Object[]{"UTF-8", null}; } @@ -338,7 +337,6 @@ // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { - isFallback = true; return new Object [] {"UTF-8", null}; } @@ -351,7 +349,6 @@ // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { - isFallback = true; return new Object [] {"UTF-8", null}; } @@ -393,7 +390,6 @@ } // default encoding - isFallback = true; return new Object [] {"UTF-8", null}; } @@ -1306,7 +1302,7 @@ // set encoding on reader if (encodingPseudoAttr != null) { - isFallback = false; + isEncodingSetInProlog = true; encoding = encodingPseudoAttr; } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]