Author: sylvain Date: Sat Mar 12 05:04:38 2005 New Revision: 157253 URL: http://svn.apache.org/viewcvs?view=rev&rev=157253 Log: Add support for flowscript file-specific encoding using a first-line comment
Modified: cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java cocoon/trunk/status.xml Modified: cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java URL: http://svn.apache.org/viewcvs/cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java?view=diff&r1=157252&r2=157253 ============================================================================== --- cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java (original) +++ cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java Sat Mar 12 05:04:38 2005 @@ -40,6 +40,9 @@ import org.apache.commons.jxpath.JXPathIntrospector; import org.apache.commons.jxpath.ri.JXPathContextReferenceImpl; import org.apache.excalibur.source.Source; +import org.apache.regexp.RE; +import org.apache.regexp.RECompiler; +import org.apache.regexp.REProgram; import org.mozilla.javascript.Context; import org.mozilla.javascript.EcmaError; import org.mozilla.javascript.EvaluatorException; @@ -62,9 +65,11 @@ import java.awt.Dimension; import java.awt.Toolkit; import java.io.BufferedReader; +import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; +import java.io.PushbackInputStream; import java.io.Reader; import java.util.ArrayList; import java.util.List; @@ -499,14 +504,42 @@ protected Script compileScript(Context cx, Scriptable scope, Source src) throws Exception { - InputStream is = src.getInputStream(); + PushbackInputStream is = new PushbackInputStream(src.getInputStream(), ENCODING_BUF_SIZE); try { - Reader reader = new BufferedReader(new InputStreamReader(is)); + String encoding = findEncoding(is); + Reader reader = encoding == null ? new InputStreamReader(is) : new InputStreamReader(is, encoding); + reader = new BufferedReader(reader); Script compiledScript = cx.compileReader(scope, reader, src.getURI(), 1, null); return compiledScript; } finally { is.close(); + } + } + + // A charset name can be up to 40 characters taken from the printable characters of US-ASCII + // (see http://www.iana.org/assignments/character-sets). So reading 100 bytes should be more than enough. + private final static int ENCODING_BUF_SIZE = 100; + // Match 'encoding = xxxx' on the first line + REProgram encodingRE = new RECompiler().compile("^.*encoding\\s*=\\s*([^\\s]*)"); + + /** + * Find the encoding of the stream, or null if not specified + */ + String findEncoding(PushbackInputStream is) throws IOException { + // Read some bytes + byte[] buffer = new byte[ENCODING_BUF_SIZE]; + int len = is.read(buffer, 0, buffer.length); + // and push them back + is.unread(buffer, 0, len); + + // Interpret them as an ASCII string + String str = new String(buffer, 0, len, "ASCII"); + RE re = new RE(encodingRE); + if (re.match(str)) { + return re.getParen(1); + } else { + return null; } } Modified: cocoon/trunk/status.xml URL: http://svn.apache.org/viewcvs/cocoon/trunk/status.xml?view=diff&r1=157252&r2=157253 ============================================================================== --- cocoon/trunk/status.xml (original) +++ cocoon/trunk/status.xml Sat Mar 12 05:04:38 2005 @@ -429,6 +429,11 @@ </action> </release> <release version="2.1.7" date="TBD"> + <action dev="SW" type="add"> + A flowscript's file encoding can now be specified by a comment on the script's + very first line, in the form "<code>// encoding = xxxx</code>". Otherwise, the + default platform encoding is used. + </action> <action dev="BD" type="fix"> Tour block: fix "shapes" sample using if/else in flowscript instead of switch.