luehe       2003/02/12 15:44:24

  Modified:    jasper2/src/share/org/apache/jasper/compiler Compiler.java
                        PageInfo.java ParserController.java Validator.java
               jasper2/src/share/org/apache/jasper/xmlparser
                        XMLEncodingDetector.java
                        XercesEncodingDetector.java
  Log:
  Last round of encoding-determination changes, to comply with the spec.
  
  Revision  Changes    Path
  1.52      +1 -1      
jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Compiler.java
  
  Index: Compiler.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Compiler.java,v
  retrieving revision 1.51
  retrieving revision 1.52
  diff -u -r1.51 -r1.52
  --- Compiler.java     12 Feb 2003 02:22:51 -0000      1.51
  +++ Compiler.java     12 Feb 2003 23:44:22 -0000      1.52
  @@ -210,7 +210,7 @@
            pageInfo.setELIgnoredSpecified(true);
        }
        pageInfo.setIsXml(JspUtil.booleanValue(jspProperty.isXml()));
  -     pageInfo.setPageEncoding(jspProperty.getPageEncoding());
  +     pageInfo.setConfigEncoding(jspProperty.getPageEncoding());
        pageInfo.setELIgnored(JspUtil.booleanValue(jspProperty.isELIgnored()));
        
pageInfo.setScriptingInvalid(JspUtil.booleanValue(jspProperty.isScriptingInvalid()));
        if (jspProperty.getIncludePrelude() != null) {
  
  
  
  1.19      +30 -3     
jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java
  
  Index: PageInfo.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/PageInfo.java,v
  retrieving revision 1.18
  retrieving revision 1.19
  diff -u -r1.18 -r1.19
  --- PageInfo.java     12 Feb 2003 02:22:51 -0000      1.18
  +++ PageInfo.java     12 Feb 2003 23:44:22 -0000      1.19
  @@ -89,6 +89,17 @@
       private String errorPage = null;
       private String pageEncoding = null;
   
  +    // Encoding specified in JSP config element
  +    private String configEncoding;
  +
  +    /*
  +     * Indicates whether an encoding has been explicitly specified in the
  +     * page's XML prolog (only used for pages in XML syntax).
  +     * This information is used to decide whether a translation error must
  +     * be reported for encoding conflicts.
  +     */
  +    private boolean isEncodingSpecifiedInProlog;
  +
       private int maxTagNesting = 0;
       private boolean scriptless = false;
       private boolean scriptingInvalid = false;
  @@ -228,6 +239,22 @@
   
       public String getPageEncoding() {
        return pageEncoding;
  +    }
  +
  +    public void setIsEncodingSpecifiedInProlog(boolean isSpecified) {
  +     this.isEncodingSpecifiedInProlog = isSpecified;
  +    }
  +
  +    public boolean isEncodingSpecifiedInProlog() {
  +     return this.isEncodingSpecifiedInProlog;
  +    }
  +
  +    public void setConfigEncoding(String enc) {
  +     this.configEncoding = enc;
  +    }
  +
  +    public String getConfigEncoding() {
  +     return this.configEncoding;
       }
   
       public int getMaxTagNesting() {
  
  
  
  1.32      +35 -27    
jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java
  
  Index: ParserController.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/ParserController.java,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- ParserController.java     27 Jan 2003 23:15:33 -0000      1.31
  +++ ParserController.java     12 Feb 2003 23:44:23 -0000      1.32
  @@ -199,27 +199,23 @@
        figureOutJspDocument(absFileName, jarFile);
   
        if (isTopFile) {
  -         if (isXml) {
  -             // Make sure the encoding determined from the XML prolog
  -             // matches that in the JSP config element, if present.
  -             // Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as identical.
  -             String jspConfigPageEnc = pageInfo.getPageEncoding();
  +         if (isXml && pageInfo.isEncodingSpecifiedInProlog()) {
  +             /*
  +              * Make sure the encoding explicitly specified in the XML
  +              * prolog (if any) matches that in the JSP config element
  +              * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
  +              * identical.
  +              */
  +             String jspConfigPageEnc = pageInfo.getConfigEncoding();
                if (jspConfigPageEnc != null
                        && !jspConfigPageEnc.equals(sourceEnc)
  -                     && (!jspConfigPageEnc.startsWith("UTF-16")
  +                     && (!jspConfigPageEnc.startsWith("UTF-16")
                            || !sourceEnc.startsWith("UTF-16"))) {
                    err.jspError("jsp.error.prolog_config_encoding_mismatch",
                                 sourceEnc, jspConfigPageEnc);
                }
  -             // override the encoding that may have been set from JSP config
  -             // info (in Compiler.generateJava()), since that applies to
  -             // standard syntax only
  -             pageInfo.setPageEncoding(sourceEnc);
  -         } else {
  -             if (pageInfo.getPageEncoding() == null) {
  -                 pageInfo.setPageEncoding(sourceEnc);
  -             }
            }
  +         pageInfo.setPageEncoding(sourceEnc);
            pageInfo.setIsXml(isXml);
            isTopFile = false;
        } else {
  @@ -305,12 +301,20 @@
       private void figureOutJspDocument(String fname, JarFile jarFile)
                throws JasperException, IOException {
   
  -     // 'true' if the syntax of the page (XML or standard) is identified by
  -     // external information: either via a JSP configuration element or
  -     // the ".jspx" suffix
  +     /*
  +      * 'true' if the syntax of the page (XML or standard) is identified by
  +      * external information: either via a JSP configuration element or
  +      * the ".jspx" suffix
  +      */
        boolean isExternal = false;
        isXml = false;
   
  +     /*
  +      * Indicates whether we need to revert from temporary usage of
  +      * "ISO-8859-1" back to "UTF-8"
  +      */
  +     boolean revert = false;
  +
        if (pageInfo.isXmlSpecified()) {
            // If <is-xml> is specified in a <jsp-property-group>, it is used.
            isXml = pageInfo.isXml();
  @@ -321,8 +325,8 @@
        }
        
        if (isExternal && !isXml) {
  -         // JSP syntax
  -         if (pageInfo.getPageEncoding() != null) {
  +         // JSP (standard) syntax
  +         if (pageInfo.getConfigEncoding() != null) {
                // Encoding specified in jsp-config (used by standard syntax
                // only)
                sourceEnc = pageInfo.getPageEncoding();
  @@ -332,18 +336,19 @@
                sourceEnc = "ISO-8859-1";
            }
        } else {
  -         // XML syntax or unknown, autodetect encoding ...
  +         // XML syntax or unknown, (auto)detect encoding ...
            Object[] ret = XMLEncodingDetector.getEncoding(fname, jarFile,
                                                           ctxt, err);
            sourceEnc = (String) ret[0];
  -         boolean isFallback = ((Boolean) ret[1]).booleanValue();
  -         if (isFallback) {
  +         if (((Boolean) ret[1]).booleanValue()) {
  +             pageInfo.setIsEncodingSpecifiedInProlog(true);
  +         }
  +
  +         if (!isXml && sourceEnc.equals("UTF-8")) {
                /*
  -              * Page does not have any XML prolog, or contains an XML
  -              * prolog that is being used as template text (in standard
  -              * syntax). This means that the page's encoding cannot be
  -              * determined from the 'encoding' attribute of an XML prolog,
  -              * or autodetected from an XML prolog.
  +              * We don't know if we're dealing with XML or standard syntax.
  +              * Therefore, we need to check and see if the page contains
  +              * a <jsp:root> element.
                 *
                 * We need to be careful, because the page may be encoded in
                 * ISO-8859-1 (or something entirely different), and may
  @@ -357,6 +362,7 @@
                 * and ISO-8859-1 are extensions of ASCII).
                 */
                sourceEnc = "ISO-8859-1";
  +             revert = true;
            }
        }
   
  @@ -389,6 +395,8 @@
            Mark mark = jspReader.skipUntil(JSP_ROOT_TAG);
            if (mark != null) {
                isXml = true;
  +             if (revert) 
  +                 sourceEnc = "UTF-8";
                return;
            } else {
                isXml = false;
  
  
  
  1.75      +36 -30    
jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java
  
  Index: Validator.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/compiler/Validator.java,v
  retrieving revision 1.74
  retrieving revision 1.75
  diff -u -r1.74 -r1.75
  --- Validator.java    10 Feb 2003 17:07:08 -0000      1.74
  +++ Validator.java    12 Feb 2003 23:44:23 -0000      1.75
  @@ -252,34 +252,10 @@
                        err.jspError(n, "jsp.error.page.multiple.pageencoding");
                    pageEncodingSeen = true;
                    /*
  -                  * It is a translation-time error to name different page
  -                  * character encodings in two or more of the following:
  -                  * the XML prolog of a JSP page, the pageEncoding
  -                  * attribute of the page directive of the JSP page, and in
  -                  * a JSP configuration element (whose URL pattern matches
  -                  * the page).
  -                  *
  -                  * At this point, we've already verified (in 
  -                  * ParserController.parse()) that the page character
  -                  * encodings specified in a JSP config element and XML
  -                  * prolog match.
  -                  *
  -                  * Treat "UTF-16", "UTF-16BE", and "UTF-16LE" as identical.
  +                  * Report any encoding conflict, treating "UTF-16",
  +                  * "UTF-16BE", and "UTF-16LE" as identical.
                     */
  -                 String compareEnc = pageInfo.getPageEncoding();
  -                 if (!value.equals(compareEnc) 
  -                         && (!value.startsWith("UTF-16")
  -                             || !compareEnc.startsWith("UTF-16"))) {
  -                     if (pageInfo.isXml()) {
  -                         err.jspError(n,
  -                                      "jsp.error.prolog_pagedir_encoding_mismatch",
  -                                      compareEnc, value);
  -                     } else {
  -                         err.jspError(n,
  -                                      "jsp.error.config_pagedir_encoding_mismatch",
  -                                      compareEnc, value);
  -                     }
  -                 }
  +                 compareEncodings(value, n, pageInfo);
                }
            }
   
  @@ -344,6 +320,36 @@
            // Do nothing, since this variable directive has already been
            // validated by TagFileProcessor when it created a TagInfo object
            // from the tag file in which the directive appeared
  +     }
  +
  +     /*
  +      * Compares the encoding specified in the 'pageEncoding' attribute of
  +      * the page directive with the encoding explicitly specified in the
  +      * XML prolog (only for XML syntax) and the encoding specified in
  +      * the JSP config element whose URL pattern matches the page, and 
  +      * throws an error in case of a mismatch.
  +      */
  +     private void compareEncodings(String pageDirEnc, Node n,
  +                                   PageInfo pageInfo)
  +                 throws JasperException {
  +
  +         String configEnc = pageInfo.getConfigEncoding();
  +         if (configEnc != null && !pageDirEnc.equals(configEnc) 
  +                 && (!pageDirEnc.startsWith("UTF-16")
  +                     || !configEnc.startsWith("UTF-16"))) {
  +             err.jspError(n, "jsp.error.config_pagedir_encoding_mismatch",
  +                          configEnc, pageDirEnc);
  +         }
  +
  +         if (pageInfo.isXml() && pageInfo.isEncodingSpecifiedInProlog()) {
  +             String pageEnc = pageInfo.getPageEncoding();
  +             if (!pageDirEnc.equals(pageEnc) 
  +                     && (!pageDirEnc.startsWith("UTF-16")
  +                         || !pageEnc.startsWith("UTF-16"))) {
  +                 err.jspError(n, "jsp.error.prolog_pagedir_encoding_mismatch",
  +                              pageEnc, pageDirEnc);
  +             }
  +         }
        }
       }
   
  
  
  
  1.5       +1 -1      
jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java
  
  Index: XMLEncodingDetector.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XMLEncodingDetector.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLEncodingDetector.java  27 Jan 2003 18:10:48 -0000      1.4
  +++ XMLEncodingDetector.java  12 Feb 2003 23:44:23 -0000      1.5
  @@ -104,7 +104,7 @@
                                      ErrorDispatcher err)
        throws IOException, JasperException
       {
  -        Object result[] = new Object[] { "UTF8", new Boolean(true) };
  +        Object result[] = new Object[] { "UTF8", new Boolean(false) };
           return result;
       }
   }
  
  
  
  1.5       +7 -11     
jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java
  
  Index: XercesEncodingDetector.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-tomcat-jasper/jasper2/src/share/org/apache/jasper/xmlparser/XercesEncodingDetector.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XercesEncodingDetector.java       27 Jan 2003 18:10:48 -0000      1.4
  +++ XercesEncodingDetector.java       12 Feb 2003 23:44:23 -0000      1.5
  @@ -80,7 +80,7 @@
       
       private InputStream stream;
       private String encoding;
  -    private boolean isFallback;
  +    private boolean isEncodingSetInProlog;
       private Boolean isBigEndian;
       private Reader reader;
       
  @@ -134,10 +134,10 @@
        * @param err The error dispatcher
        *
        * @return Two-element array, where the first element (of type
  -     * java.lang.String) contains the name of the (auto)detected encoding, 
  -     * and the second element specifies whether the default encoding
  -     * (UTF-8) is being used as a fallback (because no encoding could be
  -     * detected).
  +     * java.lang.String) contains the name of the (auto)detected encoding, and
  +     * the second element (of type java.lang.Boolean) specifies whether the 
  +     * encoding was specified using the 'encoding' attribute of an XML prolog
  +     * (TRUE) or autodetected (FALSE).
        */
       public Object[] getEncoding(InputStream in, ErrorDispatcher err)
        throws IOException, JasperException
  @@ -149,7 +149,7 @@
        detector.scanXMLDecl();
        
        return new Object[] { detector.encoding,
  -                           new Boolean(detector.isFallback) };
  +                           new Boolean(detector.isEncodingSetInProlog) };
       }
   
       public Object[] getEncodingMethod(String fname, JarFile jarFile,
  @@ -319,7 +319,6 @@
       private Object[] getEncodingName(byte[] b4, int count) {
   
           if (count < 2) {
  -         isFallback = true;
               return new Object[]{"UTF-8", null};
           }
   
  @@ -338,7 +337,6 @@
           // default to UTF-8 if we don't have enough bytes to make a
           // good determination of the encoding
           if (count < 3) {
  -         isFallback = true;
               return new Object [] {"UTF-8", null};
           }
   
  @@ -351,7 +349,6 @@
           // default to UTF-8 if we don't have enough bytes to make a
           // good determination of the encoding
           if (count < 4) {
  -         isFallback = true;
               return new Object [] {"UTF-8", null};
           }
   
  @@ -393,7 +390,6 @@
           }
   
           // default encoding
  -     isFallback = true;
           return new Object [] {"UTF-8", null};
   
       }
  @@ -1306,7 +1302,7 @@
   
           // set encoding on reader
           if (encodingPseudoAttr != null) {
  -            isFallback = false;
  +            isEncodingSetInProlog = true;
            encoding = encodingPseudoAttr;
           }
       }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to