Author: veithen Date: Wed Sep 29 09:14:26 2010 New Revision: 1002531 URL: http://svn.apache.org/viewvc?rev=1002531&view=rev Log: Work around broken character encoding autodetection in XLXP2.
Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java (with props) Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java?rev=1002531&r1=1002530&r2=1002531&view=diff ============================================================================== --- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java (original) +++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java Wed Sep 29 09:14:26 2010 @@ -63,7 +63,7 @@ class XLXP2Dialect extends AbstractStAXD } public XMLInputFactory normalize(XMLInputFactory factory) { - return new NormalizingXMLInputFactoryWrapper(factory, this); + return new XLXP2InputFactoryWrapper(factory, this); } public XMLOutputFactory normalize(XMLOutputFactory factory) { Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java?rev=1002531&view=auto ============================================================================== --- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java (added) +++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java Wed Sep 29 09:14:26 2010 @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.axiom.util.stax.dialect; + +import java.io.InputStream; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +class XLXP2InputFactoryWrapper extends NormalizingXMLInputFactoryWrapper { + public XLXP2InputFactoryWrapper(XMLInputFactory parent, AbstractStAXDialect dialect) { + super(parent, dialect); + } + + public XMLStreamReader createXMLStreamReader(InputStream stream) throws XMLStreamException { + return createXMLStreamReader(null, stream); + } + + public XMLStreamReader createXMLStreamReader(String systemId, InputStream stream) + throws XMLStreamException { + // XLXP2 fails on documents that use UTF-16 without byte order marker, + // although this type of document is explicitly supported by the XML + // specification. + EncodingDetectionHelper helper = new EncodingDetectionHelper(stream); + stream = helper.getInputStream(); + String encoding = helper.detectEncoding(); + if (encoding.startsWith("UTF-16")) { + if (systemId == null) { + return super.createXMLStreamReader(stream, encoding); + } else { + // Here we have an issue because it is not possible to specify the + // systemId and the encoding at the same time... + return super.createXMLStreamReader(systemId, stream); + } + } else { + if (systemId == null) { + return super.createXMLStreamReader(stream); + } else { + return super.createXMLStreamReader(systemId, stream); + } + } + } +} Propchange: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java?rev=1002531&r1=1002530&r2=1002531&view=diff ============================================================================== --- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java (original) +++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java Wed Sep 29 09:14:26 2010 @@ -47,8 +47,17 @@ public class DialectTest extends TestSui addDialectTest(new GetCharacterEncodingSchemeTestCase()); addDialectTest(new GetEncodingExternalTestCase()); addDialectTest(new GetEncodingFromDetectionTestCase("UTF-8", "UTF-8")); - addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeBig", "UTF-16BE")); - addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeLittle", "UTF-16LE")); + // The case of UTF-16 with a byte order marker is not well defined: + // * One may argue that the result should be UTF-16BE or UTF-16LE because + // otherwise the information about the byte order is lost. + // * On the other hand, one may argue that the result should be UTF-16 + // because UTF-16BE or UTF-16LE may be interpreted as an indication that + // there should be no BOM. + // Therefore we accept both results. + addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeBig", new String[] { "UTF-16", "UTF-16BE" } )); + addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeLittle", new String[] { "UTF-16", "UTF-16LE" })); + // Here there is no doubt; if the encoding is UTF-16 without BOM, then the + // parser should report the detected byte order. addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeBigUnmarked", "UTF-16BE")); addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeLittleUnmarked", "UTF-16LE")); addDialectTest(new GetEncodingTestCase()); Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java?rev=1002531&r1=1002530&r2=1002531&view=diff ============================================================================== --- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java (original) +++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java Wed Sep 29 09:14:26 2010 @@ -19,6 +19,9 @@ package org.apache.axiom.util.stax.dialect; import java.io.ByteArrayInputStream; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamReader; @@ -31,18 +34,24 @@ import javax.xml.stream.XMLStreamReader; */ public class GetEncodingFromDetectionTestCase extends DialectTestCase { private final String javaEncoding; - private final String xmlEncoding; + private final Set xmlEncodings; - public GetEncodingFromDetectionTestCase(String javaEncoding, String xmlEncoding) { + public GetEncodingFromDetectionTestCase(String javaEncoding, String[] xmlEncodings) { this.javaEncoding = javaEncoding; - this.xmlEncoding = xmlEncoding; + this.xmlEncodings = new HashSet(Arrays.asList(xmlEncodings)); setName(getClass().getName() + " [" + javaEncoding + "]"); } + + public GetEncodingFromDetectionTestCase(String javaEncoding, String xmlEncoding) { + this(javaEncoding, new String[] { xmlEncoding }); + } protected void runTest() throws Throwable { XMLInputFactory factory = newNormalizedXMLInputFactory(); XMLStreamReader reader = factory.createXMLStreamReader(new ByteArrayInputStream( "<?xml version=\"1.0\"?><root/>".getBytes(javaEncoding))); - assertEquals(xmlEncoding, reader.getEncoding()); + String actualEncoding = reader.getEncoding(); + assertTrue("Expected one of " + xmlEncodings + ", but got " + actualEncoding, + xmlEncodings.contains(actualEncoding)); } }