Author: veithen Date: Mon Sep 27 22:03:00 2010 New Revision: 1001925 URL: http://svn.apache.org/viewvc?rev=1001925&view=rev Log: Moved the character encoding autodetection code (Appendix F.1 of the XML specs) into a separate class so that we can reuse if for other broken StAX implementations.
Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java (with props) Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java?rev=1001925&r1=1001924&r2=1001925&view=diff ============================================================================== --- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java (original) +++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java Mon Sep 27 22:03:00 2010 @@ -19,9 +19,7 @@ package org.apache.axiom.util.stax.dialect; -import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.io.Reader; import javax.xml.stream.XMLInputFactory; @@ -50,62 +48,9 @@ class BEAInputFactoryWrapper extends XML // information is already available from the stream reader, so that we don't need to // reimplement this part. // TODO: this needs some more unit testing! - byte[] startBytes = new byte[4]; - try { - boolean useMark = stream.markSupported(); - if (useMark) { - stream.mark(4); - } else { - stream = new PushbackInputStream(stream, 4); - } - int read = 0; - do { - int c = stream.read(startBytes, read, 4-read); - if (c == -1) { - throw new XMLStreamException("Unexpected end of stream"); - } - read += c; - } while (read < 4); - if (useMark) { - stream.reset(); - } else { - ((PushbackInputStream)stream).unread(startBytes); - } - } catch (IOException ex) { - throw new XMLStreamException("Unable to read start bytes", ex); - } - int marker = ((startBytes[0] & 0xFF) << 24) + ((startBytes[1] & 0xFF) << 16) - + ((startBytes[2] & 0xFF) << 8) + (startBytes[3] & 0xFF); - String encoding; - switch (marker) { - case 0x0000FEFF: - case 0xFFFE0000: - case 0x0000FFFE: - case 0xFEFF0000: - case 0x0000003C: - case 0x3C000000: - case 0x00003C00: - case 0x003C0000: - encoding = "UCS-4"; - break; - case 0x003C003F: - encoding = "UTF-16BE"; - break; - case 0x3C003F00: - encoding = "UTF-16LE"; - break; - case 0x3C3F786D: - encoding = "UTF-8"; - break; - default: - if ((marker & 0xFFFF0000) == 0xFEFF0000) { - encoding = "UTF-16BE"; - } else if ((marker & 0xFFFF0000) == 0xFFFE0000) { - encoding = "UTF-16LE"; - } else { - encoding = "UTF-8"; - } - } + EncodingDetectionHelper helper = new EncodingDetectionHelper(stream); + stream = helper.getInputStream(); + String encoding = helper.detectEncoding(); XMLStreamReader reader; if (systemId == null) { reader = super.createXMLStreamReader(stream); Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java?rev=1001925&view=auto ============================================================================== --- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java (added) +++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java Mon Sep 27 22:03:00 2010 @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.axiom.util.stax.dialect; + +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; + +import javax.xml.stream.XMLStreamException; + +/** + * Implements the character encoding autodetection algorithm described in Appendix F.1 of the + * XML 1.0 specifications (Fifth Edition). + */ +class EncodingDetectionHelper { + private final InputStream stream; + private final boolean useMark; + + public EncodingDetectionHelper(InputStream stream) { + useMark = stream.markSupported(); + if (useMark) { + this.stream = stream; + } else { + this.stream = new PushbackInputStream(stream, 4); + } + } + + public InputStream getInputStream() { + return stream; + } + + public String detectEncoding() throws XMLStreamException { + byte[] startBytes = new byte[4]; + try { + if (useMark) { + stream.mark(4); + } + int read = 0; + do { + int c = stream.read(startBytes, read, 4-read); + if (c == -1) { + throw new XMLStreamException("Unexpected end of stream"); + } + read += c; + } while (read < 4); + if (useMark) { + stream.reset(); + } else { + ((PushbackInputStream)stream).unread(startBytes); + } + } catch (IOException ex) { + throw new XMLStreamException("Unable to read start bytes", ex); + } + int marker = ((startBytes[0] & 0xFF) << 24) + ((startBytes[1] & 0xFF) << 16) + + ((startBytes[2] & 0xFF) << 8) + (startBytes[3] & 0xFF); + switch (marker) { + case 0x0000FEFF: + case 0xFFFE0000: + case 0x0000FFFE: + case 0xFEFF0000: + case 0x0000003C: + case 0x3C000000: + case 0x00003C00: + case 0x003C0000: + return "UCS-4"; + case 0x003C003F: + return "UTF-16BE"; + case 0x3C003F00: + return "UTF-16LE"; + case 0x3C3F786D: + return "UTF-8"; + default: + if ((marker & 0xFFFF0000) == 0xFEFF0000) { + return "UTF-16BE"; + } else if ((marker & 0xFFFF0000) == 0xFFFE0000) { + return "UTF-16LE"; + } else { + return "UTF-8"; + } + } + } +} Propchange: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java ------------------------------------------------------------------------------ svn:eol-style = native