Author: schor Date: Wed Mar 23 21:47:07 2011 New Revision: 1084766 URL: http://svn.apache.org/viewvc?rev=1084766&view=rev Log: [UIMA-2099]
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/pear/util/XMLUtil.java Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/pear/util/XMLUtil.java URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/pear/util/XMLUtil.java?rev=1084766&r1=1084765&r2=1084766&view=diff ============================================================================== --- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/pear/util/XMLUtil.java (original) +++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/pear/util/XMLUtil.java Wed Mar 23 21:47:07 2011 @@ -158,8 +158,9 @@ public class XMLUtil { BufferedReader fReader = null; try { // first, make sure - this is a valid XML file - if (!isValidXmlFile(xmlFile)) + if (!isValidXmlFile(xmlFile)) { return null; + } iStream = new FileInputStream(xmlFile); // read prefix - possible BOM or signature int byteCounter = 0; @@ -226,7 +227,12 @@ public class XMLUtil { } if (offset != (bytes2put - 1)) throw new IOException("cannot read file"); - // check first XML header characters - '<?xml' + // check first XML header characters - '<?' + // buffer is 7 bytes + // some Javas won't properly decode an odd number of bytes for utf16 coding + // https://issues.apache.org/jira/browse/UIMA-2099 + byte[] buffer6 = new byte[6]; + System.arraycopy(buffer, 0, buffer6, 0, 6); if (utf8Signature) { // check for UTF-8 String test = new String(buffer, "UTF-8"); @@ -234,7 +240,7 @@ public class XMLUtil { encoding = "UTF-8"; } else if (utf16Signature) { // check for UTF-16 - String test = new String(buffer, "UTF-16"); + String test = new String(buffer6, "UTF-16"); if (test.startsWith(FIRST_XML_CHARS)) encoding = "UTF-16"; } else if (utf32Signature) { @@ -246,12 +252,12 @@ public class XMLUtil { encoding = "UTF-8"; else { // next, check for UTF-16LE in XML header characters - test = new String(buffer, "UTF-16LE"); + test = new String(buffer6, "UTF-16LE"); if (test.startsWith(FIRST_XML_CHARS)) { encoding = "UTF-16LE"; } else { - // next, check for UTF-16BE in XML header hcharacters - test = new String(buffer, "UTF-16BE"); + // next, check for UTF-16BE in XML header characters + test = new String(buffer6, "UTF-16BE"); if (test.startsWith(FIRST_XML_CHARS)) { encoding = "UTF-16BE"; }