Author: markt
Date: Thu Dec 15 21:34:06 2016
New Revision: 1774526
URL: http://svn.apache.org/viewvc?rev=1774526&view=rev
Log:
Add a new encoding detector implementation.
The BoM encoding detection is based in the previous code.
The prolog encoding detection delegates to the JRE's XM<L parser rather than
the custom Jasper parser.
Added:
tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (with
props)
Modified:
tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
Added: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java?rev=1774526&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java (added)
+++ tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java Thu Dec
15 21:34:06 2016
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jasper.compiler;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/*
+ * The BoM detection is derived from:
+ *
http://svn.us.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248
+ */
+class EncodingDetector {
+
+ private static final XMLInputFactory XML_INPUT_FACTORY;
+ static {
+ XML_INPUT_FACTORY = XMLInputFactory.newFactory();
+ }
+
+ private final BomResult bomResult;
+ private final String prologEncoding;
+
+
+ /*
+ * TODO: Refactor Jasper InputStream creation and handling so the
+ * InputStream passed to this method is buffered and therefore saves
+ * on multiple opening and re-opening of the same file.
+ */
+ EncodingDetector(InputStream is) throws IOException {
+ // Keep buffer size to a minimum here. BoM will be no more than 4 bytes
+ // so that is the maximum we need to buffer
+ BufferedInputStream bis = new BufferedInputStream(is, 4);
+ bis.mark(4);
+
+ bomResult = processBom(bis);
+
+ // Reset the stream back to the start to allow the XML prolog detection
+ // to work. Skip any BoM we discovered.
+ bis.reset();
+ if (bomResult != null) {
+ for (int i = 0; i < bomResult.skip; i++) {
+ is.read();
+ }
+ }
+
+ prologEncoding = getPrologEncoding(bis);
+ }
+
+
+ String getBomEncoding() {
+ return bomResult.encoding;
+ }
+
+
+ Boolean getBigEndian() {
+ return bomResult.bigEndian;
+ }
+
+
+ int getSkip() {
+ return bomResult.skip;
+ }
+
+
+ String getPrologEncoding() {
+ return prologEncoding;
+ }
+
+
+ private String getPrologEncoding(InputStream stream) {
+ String encoding = null;
+ try {
+ XMLStreamReader xmlStreamReader =
XML_INPUT_FACTORY.createXMLStreamReader(stream);
+ encoding = xmlStreamReader.getCharacterEncodingScheme();
+ } catch (XMLStreamException e) {
+ // Ignore
+ }
+ return encoding;
+ }
+
+
+ private BomResult processBom(InputStream stream) {
+ // Read first four bytes (or as many are available) and determine
+ // encoding
+ try {
+ final byte[] b4 = new byte[4];
+ int count = 0;
+ int singleByteRead;
+ while (count < 4) {
+ singleByteRead = stream.read();
+ if (singleByteRead == -1) {
+ break;
+ }
+ b4[count] = (byte) singleByteRead;
+ count++;
+ }
+
+ return parseBom(b4, count);
+ } catch (IOException ioe) {
+ // Failed.
+ return new BomResult("UTF-8", null, 0);
+ }
+ }
+
+
+ private BomResult parseBom(byte[] b4, int count) {
+
+ if (count < 2) {
+ return new BomResult("UTF-8", null, 0);
+ }
+
+ // UTF-16, with BOM
+ int b0 = b4[0] & 0xFF;
+ int b1 = b4[1] & 0xFF;
+ if (b0 == 0xFE && b1 == 0xFF) {
+ // UTF-16, big-endian
+ return new BomResult("UTF-16BE", Boolean.TRUE, 2);
+ }
+ if (b0 == 0xFF && b1 == 0xFE) {
+ // UTF-16, little-endian
+ return new BomResult("UTF-16LE", Boolean.FALSE, 2);
+ }
+
+ // default to UTF-8 if we don't have enough bytes to make a
+ // good determination of the encoding
+ if (count < 3) {
+ return new BomResult("UTF-8", null, 0);
+ }
+
+ // UTF-8 with a BOM
+ int b2 = b4[2] & 0xFF;
+ if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+ return new BomResult("UTF-8", null, 3);
+ }
+
+ // default to UTF-8 if we don't have enough bytes to make a
+ // good determination of the encoding
+ if (count < 4) {
+ return new BomResult("UTF-8", null, 0);
+ }
+
+ // other encodings
+ int b3 = b4[3] & 0xFF;
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
+ // UCS-4, big endian (1234)
+ return new BomResult("ISO-10646-UCS-4", Boolean.TRUE, 4);
+ }
+ if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
+ // UCS-4, little endian (4321)
+ return new BomResult("ISO-10646-UCS-4", Boolean.FALSE, 4);
+ }
+ if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
+ // UCS-4, unusual octet order (2143)
+ // REVISIT: What should this be?
+ return new BomResult("ISO-10646-UCS-4", null, 4);
+ }
+ if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
+ // UCS-4, unusual octect order (3412)
+ // REVISIT: What should this be?
+ return new BomResult("ISO-10646-UCS-4", null, 4);
+ }
+ if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
+ // UTF-16, big-endian, no BOM
+ // (or could turn out to be UCS-2...
+ // REVISIT: What should this be?
+ return new BomResult("UTF-16BE", Boolean.TRUE, 4);
+ }
+ if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
+ // UTF-16, little-endian, no BOM
+ // (or could turn out to be UCS-2...
+ return new BomResult("UTF-16LE", Boolean.FALSE, 4);
+ }
+ if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
+ // EBCDIC
+ // a la xerces1, return CP037 instead of EBCDIC here
+ return new BomResult("CP037", null, 4);
+ }
+
+ // default encoding
+ return new BomResult("UTF-8", null, 0);
+ }
+
+
+ private static class BomResult {
+
+ public final String encoding;
+ public final Boolean bigEndian;
+ public final int skip;
+
+ public BomResult(String encoding, Boolean bigEndian, int skip) {
+ this.encoding = encoding;
+ this.bigEndian = bigEndian;
+ this.skip = skip;
+ }
+ }
+}
Propchange: tomcat/trunk/java/org/apache/jasper/compiler/EncodingDetector.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java?rev=1774526&r1=1774525&r2=1774526&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java
(original)
+++ tomcat/trunk/java/org/apache/jasper/security/SecurityClassLoad.java Thu Dec
15 21:34:06 2016
@@ -39,6 +39,10 @@ public final class SecurityClassLoad {
final String basePackage = "org.apache.jasper.";
try {
+ // Ensure XMLInputFactory is loaded with Tomcat's class loader
+ loader.loadClass( basePackage +
+ "comppiler.EncodingDetector");
+
loader.loadClass( basePackage +
"runtime.JspFactoryImpl$PrivilegedGetPageContext");
loader.loadClass( basePackage +
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]