[this has also been entered as bug #1808] Both Tomcat and Apache have the string '8859_1' hard-coded and as a public static final String in several places. Although Java accepts '8859_1' as an alias for the ISO-8859-1 character set, this isn't a valid name anywhere else; the valid aliases are listed at <URL:http://www.iana.org/assignments/character-sets> Some user-agents (I first noticed this on an older version of Lynx) are confused by this. This patch will: - remove all references in code (not comments) to '8859_1' - In classes where this string was used, add a public static final String DEFAULT_CHAR_ENCODING if none was present (this is the most frequently used name when such a field is present) - In the src/org/apache/jasper tree: - add a public static final String DEFAULT_CHAR_ENCODING to Constants.java - replace all occurrences of '8859_1' in code with Constants.DEFAULT_CHAR_ENCODING as this seems to me be the proper way to do this in Jasper. Regards, Vince.
Index: src/share/org/apache/tomcat/util/buf/MessageBytes.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/tomcat/util/buf/MessageBytes.java,v retrieving revision 1.1 diff -u -r1.1 MessageBytes.java --- src/share/org/apache/tomcat/util/buf/MessageBytes.java 2001/02/20 03:12:13 1.1 +++ src/share/org/apache/tomcat/util/buf/MessageBytes.java 2001/05/18 11:05:42 @@ -74,7 +74,7 @@ * @author Costin Manolache */ public final class MessageBytes implements Cloneable, Serializable { - public static final String DEFAULT_CHAR_ENCODING="8859_1"; + public static final String DEFAULT_CHAR_ENCODING="iso-8859-1"; // primary type ( whatever is set as original value ) private int type = T_NULL; Index: src/share/org/apache/tomcat/util/http/Parameters.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/tomcat/util/http/Parameters.java,v retrieving revision 1.11 diff -u -r1.11 Parameters.java --- src/share/org/apache/tomcat/util/http/Parameters.java 2001/02/20 03:14:11 1.11 +++ src/share/org/apache/tomcat/util/http/Parameters.java 2001/05/18 11:06:04 @@ -83,7 +83,8 @@ MimeHeaders headers; public static final int INITIAL_SIZE=4; - + public static final String DEFAULT_CHAR_ENCODING = "iso-8859-1"; + // Garbage-less parameter merging. // In a sub-request with parameters, the new parameters // will be stored in child. When a getParameter happens, @@ -265,7 +266,7 @@ try { String postedBody = new String(data, 0, data.length, - "8859_1"); + DEFAULT_CHAR_ENCODING); // XXX encoding !!! processFormData( postedBody ); Index: src/share/org/apache/tomcat/modules/server/Ajp13.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/tomcat/modules/server/Ajp13.java,v retrieving revision 1.17 diff -u -r1.17 Ajp13.java --- src/share/org/apache/tomcat/modules/server/Ajp13.java 2001/02/28 19:41:23 1.17 +++ src/share/org/apache/tomcat/modules/server/Ajp13.java 2001/05/18 11:06:26 @@ -913,7 +913,7 @@ return (getByte() == (byte) 1); } - public static final String DEFAULT_CHAR_ENCODING = "8859_1"; + public static final String DEFAULT_CHAR_ENCODING = "iso-8859-1"; public void getMessageBytes( MessageBytes mb ) { int length = getInt(); Index: src/share/org/apache/tomcat/core/OutputBuffer.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/tomcat/core/OutputBuffer.java,v retrieving revision 1.13 diff -u -r1.13 OutputBuffer.java --- src/share/org/apache/tomcat/core/OutputBuffer.java 2001/02/27 02:45:02 1.13 +++ src/share/org/apache/tomcat/core/OutputBuffer.java 2001/05/18 11:06:48 @@ -80,6 +80,7 @@ int defaultBufferSize = DEFAULT_BUFFER_SIZE; int defaultCharBufferSize = DEFAULT_BUFFER_SIZE / 2 ; + public static final String DEFAULT_CHAR_ENCODING = "iso-8859-1"; // The buffer can be used for byte[] and char[] writing // ( this is needed to support ServletOutputStream and for // efficient implementations of templating systems ) @@ -426,7 +427,7 @@ if( resp!=null ) enc = resp.getCharacterEncoding(); gotEnc=true; - if(enc==null) enc="8859_1"; + if(enc==null) enc=DEFAULT_CHAR_ENCODING; conv=(WriteConvertor)encoders.get(enc); if(conv==null) { IntermediateOutputStream ios=new IntermediateOutputStream(this); @@ -434,11 +435,11 @@ conv=new WriteConvertor(ios,enc); encoders.put(enc, conv); } catch(UnsupportedEncodingException ex ) { - conv=(WriteConvertor)encoders.get("8859_1"); + conv=(WriteConvertor)encoders.get(DEFAULT_CHAR_ENCODING); if(conv==null) { try { - conv=new WriteConvertor(ios, "8859_1"); - encoders.put("8859_1", conv); + conv=new WriteConvertor(ios, DEFAULT_CHAR_ENCODING); + encoders.put(DEFAULT_CHAR_ENCODING, conv); } catch( UnsupportedEncodingException e ) {} } } Index: src/share/org/apache/tomcat/core/Response.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/tomcat/core/Response.java,v retrieving revision 1.50 diff -u -r1.50 Response.java --- src/share/org/apache/tomcat/core/Response.java 2001/02/26 04:34:16 1.50 +++ src/share/org/apache/tomcat/core/Response.java 2001/05/18 11:07:10 @@ -80,7 +80,7 @@ public static final String DEFAULT_CONTENT_TYPE = "text/plain"; - public static final String DEFAULT_CHAR_ENCODING = "8859_1"; + public static final String DEFAULT_CHAR_ENCODING = "iso-8859-1"; public static final String LOCALE_DEFAULT="en"; Index: src/share/org/apache/jasper/compiler/Compiler.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/jasper/compiler/Compiler.java,v retrieving revision 1.23 diff -u -r1.23 Compiler.java --- src/share/org/apache/jasper/compiler/Compiler.java 2001/03/02 04:51:30 1.23 +++ src/share/org/apache/jasper/compiler/Compiler.java 2001/05/18 11:07:33 @@ -143,7 +143,7 @@ // - compiling the generated servlets (pass -encoding to javac). // XXX - There are really three encodings of interest. - String jspEncoding = "8859_1"; // default per JSP spec + String jspEncoding = Constants.DEFAULT_CHAR_ENCODING; // We try UTF8 by default. If it fails, we use the java encoding // specified for JspServlet init parameter "javaEncoding". Index: src/share/org/apache/tomcat/modules/server/Http10.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/tomcat/modules/server/Http10.java,v retrieving revision 1.9 diff -u -r1.9 Http10.java --- src/share/org/apache/tomcat/modules/server/Http10.java 2001/03/02 04:49:24 1.9 +++ src/share/org/apache/tomcat/modules/server/Http10.java 2001/05/18 11:07:57 @@ -79,7 +79,7 @@ int bufSize=2048; // default int off=0; int count=0; - public static final String DEFAULT_CHARACTER_ENCODING = "8859_1"; + public static final String DEFAULT_CHARACTER_ENCODING = "iso-8859-1"; protected static final int DEFAULT_HEAD_BUFFER_SIZE = 1024; protected byte[] oBuffer = new byte[DEFAULT_HEAD_BUFFER_SIZE]; Index: src/facade22/org/apache/tomcat/facade/HttpServletRequestFacade.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/facade22/org/apache/tomcat/facade/HttpServletRequestFacade.java,v retrieving revision 1.23 diff -u -r1.23 HttpServletRequestFacade.java --- src/facade22/org/apache/tomcat/facade/HttpServletRequestFacade.java 2001/05/15 09:50:37 1.23 +++ src/facade22/org/apache/tomcat/facade/HttpServletRequestFacade.java 2001/05/18 +11:08:19 @@ -90,6 +90,9 @@ private Request request; + public static final String DEFAULT_CHAR_ENCODING = "iso-8859-1"; + // that's the default in HTTP and servlet spec + HttpSessionFacade sessionFacade; ServletInputStreamFacade isFacade=new ServletInputStreamFacade(); boolean isFacadeInitialized=false; @@ -332,7 +335,7 @@ // XXX provide recycleable objects String encoding = request.getCharacterEncoding(); if (encoding == null) { - encoding = "8859_1"; // that's the default in HTTP and servlet spec + encoding = DEFAULT_CHAR_ENCODING; } InputStreamReader r = Index: src/share/org/apache/jasper/compiler/JspParseEventListener.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/jasper/compiler/JspParseEventListener.java,v retrieving revision 1.29 diff -u -r1.29 JspParseEventListener.java --- src/share/org/apache/jasper/compiler/JspParseEventListener.java 2001/05/15 18:53:37 1.29 +++ src/share/org/apache/jasper/compiler/JspParseEventListener.java 2001/05/18 +11:08:42 @@ -327,7 +327,7 @@ else writer.println("response.setContentType(\"" + servletContentType + - ";charset=8859_1\");"); + ";charset=" + Constants.DEFAULT_CHAR_ENCODING + "\");"); writer.println("pageContext = _jspxFactory.getPageContext(this, request, response,\n" + "\t\t\t" + writer.quoteString(error) + ", " Index: src/share/org/apache/jasper/compiler/JspReader.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/jasper/compiler/JspReader.java,v retrieving revision 1.23 diff -u -r1.23 JspReader.java --- src/share/org/apache/jasper/compiler/JspReader.java 2001/03/02 04:51:33 1.23 +++ src/share/org/apache/jasper/compiler/JspReader.java 2001/05/18 11:09:10 @@ -279,7 +279,7 @@ { this.context = ctx; this.encoding = encoding; - if (this.encoding == null) this.encoding = "8859_1"; + if (this.encoding == null) this.encoding = Constants.DEFAULT_CHAR_ENCODING; pushFile(file, encoding); } Index: src/share/org/apache/jasper/Constants.java =================================================================== RCS file: /home/cvspublic/jakarta-tomcat/src/share/org/apache/jasper/Constants.java,v retrieving revision 1.21 diff -u -r1.21 Constants.java --- src/share/org/apache/jasper/Constants.java 2001/04/28 21:13:36 1.21 +++ src/share/org/apache/jasper/Constants.java 2001/05/18 11:09:34 @@ -93,6 +93,11 @@ public static final String SERVLET_CONTENT_TYPE = "text/html"; /** + * Default character encoding. + */ + public static final String DEFAULT_CHAR_ENCODING = "iso-8859-1"; + + /** * These classes/packages are automatically imported by the * generated code. *