Author: sylvain
Date: Sat Mar 12 05:04:38 2005
New Revision: 157253

URL: http://svn.apache.org/viewcvs?view=rev&rev=157253
Log:
Add support for flowscript file-specific encoding using a first-line comment

Modified:
    
cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java
    cocoon/trunk/status.xml

Modified: 
cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java
URL: 
http://svn.apache.org/viewcvs/cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java?view=diff&r1=157252&r2=157253
==============================================================================
--- 
cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java
 (original)
+++ 
cocoon/trunk/src/java/org/apache/cocoon/components/flow/javascript/fom/FOM_JavaScriptInterpreter.java
 Sat Mar 12 05:04:38 2005
@@ -40,6 +40,9 @@
 import org.apache.commons.jxpath.JXPathIntrospector;
 import org.apache.commons.jxpath.ri.JXPathContextReferenceImpl;
 import org.apache.excalibur.source.Source;
+import org.apache.regexp.RE;
+import org.apache.regexp.RECompiler;
+import org.apache.regexp.REProgram;
 import org.mozilla.javascript.Context;
 import org.mozilla.javascript.EcmaError;
 import org.mozilla.javascript.EvaluatorException;
@@ -62,9 +65,11 @@
 import java.awt.Dimension;
 import java.awt.Toolkit;
 import java.io.BufferedReader;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
+import java.io.PushbackInputStream;
 import java.io.Reader;
 import java.util.ArrayList;
 import java.util.List;
@@ -499,14 +504,42 @@
 
     protected Script compileScript(Context cx, Scriptable scope, Source src)
     throws Exception {
-        InputStream is = src.getInputStream();
+        PushbackInputStream is = new PushbackInputStream(src.getInputStream(), 
ENCODING_BUF_SIZE);
         try {
-            Reader reader = new BufferedReader(new InputStreamReader(is));
+            String encoding = findEncoding(is);
+            Reader reader = encoding == null ? new InputStreamReader(is) : new 
InputStreamReader(is, encoding);
+            reader = new BufferedReader(reader);
             Script compiledScript = cx.compileReader(scope, reader,
                     src.getURI(), 1, null);
             return compiledScript;
         } finally {
             is.close();
+        }
+    }
+    
+    // A charset name can be up to 40 characters taken from the printable 
characters of US-ASCII
+    // (see http://www.iana.org/assignments/character-sets). So reading 100 
bytes should be more than enough.
+    private final static int ENCODING_BUF_SIZE = 100;
+    // Match 'encoding = xxxx' on the first line
+    REProgram encodingRE = new 
RECompiler().compile("^.*encoding\\s*=\\s*([^\\s]*)");
+    
+    /**
+     * Find the encoding of the stream, or null if not specified
+     */
+    String findEncoding(PushbackInputStream is) throws IOException {
+        // Read some bytes
+        byte[] buffer = new byte[ENCODING_BUF_SIZE];
+        int len = is.read(buffer, 0, buffer.length);
+        // and push them back
+        is.unread(buffer, 0, len);
+        
+        // Interpret them as an ASCII string
+        String str = new String(buffer, 0, len, "ASCII");
+        RE re = new RE(encodingRE);
+        if (re.match(str)) {
+            return re.getParen(1);
+        } else {
+            return null;
         }
     }
 

Modified: cocoon/trunk/status.xml
URL: 
http://svn.apache.org/viewcvs/cocoon/trunk/status.xml?view=diff&r1=157252&r2=157253
==============================================================================
--- cocoon/trunk/status.xml (original)
+++ cocoon/trunk/status.xml Sat Mar 12 05:04:38 2005
@@ -429,6 +429,11 @@
    </action>
  </release>
  <release version="2.1.7" date="TBD">
+   <action dev="SW" type="add">
+     A flowscript's file encoding can now be specified by a comment on the 
script's
+     very first line, in the form "<code>// encoding = xxxx</code>". 
Otherwise, the
+     default platform encoding is used.
+   </action>
    <action dev="BD" type="fix">
      Tour block: fix "shapes" sample using if/else in flowscript
      instead of switch.


Reply via email to