costin 01/05/27 16:14:24
Modified: src/share/org/apache/tomcat/modules/mappers
DecodeInterceptor.java
Log:
Added 2 very common charset detection schemes.
The most important is the per session charset - all browsers will use the
same encoding for the posted data.
Another common one is using an attribute in the request uri ( or parameter -
this is not done yet ).
Both will be disabled in the default config, but I think will be very
usefull for anyone running sites with non 8859-1 charsets.
More to come, please provide feedback.
Revision Changes Path
1.2 +97 -14
jakarta-tomcat/src/share/org/apache/tomcat/modules/mappers/DecodeInterceptor.java
Index: DecodeInterceptor.java
===================================================================
RCS file:
/home/cvs/jakarta-tomcat/src/share/org/apache/tomcat/modules/mappers/DecodeInterceptor.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- DecodeInterceptor.java 2001/05/26 18:04:42 1.1
+++ DecodeInterceptor.java 2001/05/27 23:14:24 1.2
@@ -71,8 +71,16 @@
*
*/
public class DecodeInterceptor extends BaseInterceptor {
- String defaultEncoding=null;
- private int encodingInfo;
+ private String defaultEncoding=null;
+ // debug, default will be false, null, null
+ private boolean useSessionEncoding=true;
+ private String charsetAttribute="charset";
+ private String charsetURIAttribute=";charset=";
+
+ // Note ids
+ private int encodingInfoNote;
+ private int sessionEncodingNote;
+
public DecodeInterceptor() {
}
@@ -86,26 +94,37 @@
public void setDefaultEncoding( String s ) {
defaultEncoding=s;
}
+
+ public void setUseSessionEncoding( boolean b ) {
+ useSessionEncoding=b;
+ }
+
+ public void setCharsetAttribute( String s ) {
+ charsetAttribute=s;
+ charsetURIAttribute=";" + charsetAttribute + "=";
+ }
/* -------------------- Initialization -------------------- */
public void engineInit( ContextManager cm )
throws TomcatException
{
- encodingInfo=cm.getNoteId(ContextManager.REQUEST_NOTE,
+ encodingInfoNote=cm.getNoteId(ContextManager.REQUEST_NOTE,
"req.encoding" );
+ sessionEncodingNote=cm.getNoteId(ContextManager.SESSION_NOTE,
+ "session.encoding" );
}
/* -------------------- Request mapping -------------------- */
public int postReadRequest( Request req ) {
MessageBytes pathMB = req.requestURI();
// copy the request
-
+
if( pathMB.isNull())
throw new RuntimeException("ASSERT: null path in request URI");
- // if( path.indexOf("?") >=0 )
- // throw new RuntimeException("ASSERT: ? in requestURI");
+ //if( path.indexOf("?") >=0 )
+ // throw new RuntimeException("ASSERT: ? in requestURI");
// Set the char encoding first
String charEncoding=null;
@@ -121,18 +140,44 @@
charEncoding + " " + contentTypeString );
}
- if( debug > 11 ) dumpHeaders(headers);
+ if( debug > 99 ) dumpHeaders(headers);
// No explicit encoding - try to guess it from Accept-Language
//MessageBytes acceptC= headers.getValue( "Accept-Charset" );
// No explicit encoding - try to guess it from Accept-Language
// MessageBytes acceptL= headers.getValue( "Accept-Language" );
+
+ // Special trick: ;charset= attribute ( similar with sessionId )
+ // That's perfect for multibyte chars in URLs
+ if(charEncoding==null && charsetURIAttribute != null ) {
+ int idxCharset=req.requestURI().indexOf( charsetURIAttribute );
+ if( idxCharset >= 0 ) {
+ String uri=req.requestURI().toString();
+ int nextAtt=uri.indexOf( ';', idxCharset + 1 );
+ String next=null;
+ if( nextAtt > 0 ) {
+ next=uri.substring( nextAtt );
+ charEncoding=
+ uri.substring(idxCharset+
+ charsetURIAttribute.length(),nextAtt);
+ req.requestURI().
+ setString(uri.substring(0, idxCharset) + next);
+ } else {
+ charEncoding=uri.substring(idxCharset+
+ charsetURIAttribute.length());
+ req.requestURI().
+ setString(uri.substring(0, idxCharset));
+ }
+
+ if( debug > 0 )
+ log("ReqAtt= " + charEncoding + " " +
+ req.requestURI() );
+ }
+ }
- // Try per context default
-
- // Global Default
+ // Global Default
if( charEncoding==null ) {
if( debug > 0 ) log( "Default encoding " + defaultEncoding );
if( defaultEncoding != null )
@@ -160,18 +205,56 @@
return 0;
}
+ /** Hook - before the response is sent, get the response encoding
+ * and save it per session ( if we are in a session ). All browsers
+ * I know will use the same encoding in the next request.
+ * Since this is not part of the spec, it's disabled by default.
+ *
+ */
+ public int beforeBody( Request req, Response res ) {
+ if( useSessionEncoding ) {
+ ServerSession sess=req.getSession( false );
+ if( sess!=null ) {
+ String charset=res.getCharacterEncoding();
+ if( charset!=null ) {
+ sess.setNote( sessionEncodingNote, charset );
+ if( debug > 0 )
+ log( "Setting per session encoding " + charset);
+ }
+ }
+ }
+ return DECLINED;
+ }
+
+
public Object getInfo( Context ctx, Request req, int info, String k ) {
// Try to get the encoding info ( this is called later )
- if( info == encodingInfo ) {
+ if( info == encodingInfoNote ) {
// Second attempt to guess the encoding, the request is processed
-
+ String charset=null;
// Use request attributes
-
+ if( charset==null && charsetAttribute != null ) {
+ charset=(String)req.getAttribute( charsetAttribute );
+ if( debug>0 && charset != null )
+ log( "Charset from attribute " + charsetAttribute + " "
+ + charset );
+ }
// Use session attributes
-
+ if( charset==null && useSessionEncoding ) {
+ ServerSession sess=req.getSession( false );
+ if( sess!=null ) {
+ charset=(String)sess.getNote( sessionEncodingNote );
+ if( debug>0 && charset!=null )
+ log("Charset from session " + charset );
+ }
+ }
+ // Per context default
+
+ if( charset != null ) return charset;
+
log( "Default getInfo UTF-8" );
// Use per context default
return "UTF-8";