Author: markt Date: Sun Mar 3 23:07:28 2013 New Revision: 1452152 URL: http://svn.apache.org/r1452152 Log: Start of a new framework for UTF-8 testing. I have discovered that the correct replacement behaviour (that we want to use for URIs) has many, many edge cases. This class should make it easier to construct additional tests.
Added: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java (with props) Added: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java URL: http://svn.apache.org/viewvc/tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java?rev=1452152&view=auto ============================================================================== --- tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java (added) +++ tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java Sun Mar 3 23:07:28 2013 @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestUtf8Extended { + + private List<Utf8TestCase> testCases = new ArrayList<>(); + + @Before + public void setup() { + testCases.add(new Utf8TestCase( + "Zero length input", + new int[] {}, + -1, + "")); + testCases.add(new Utf8TestCase( + "Valid one byte sequence", + new int[] {0x41}, + -1, + "A")); + testCases.add(new Utf8TestCase( + "Valid two byte sequence", + new int[] {0xC2, 0xA9}, + -1, + "\u00A9")); + testCases.add(new Utf8TestCase( + "Valid three byte sequence", + new int[] {0xE0, 0xA4, 0x87}, + -1, + "\u0907")); + testCases.add(new Utf8TestCase( + "Valid four byte sequence", + new int[] {0xF0, 0x90, 0x90, 0x80}, + -1, + "\uD801\uDC00")); + } + + @Test + public void testHarmonyDecoder() { + doTest(new Utf8Decoder()); + } + + + @Test + public void testJvmDecoder() { + doTest(Charset.forName("UTF-8").newDecoder()); + } + + + private void doTest(CharsetDecoder decoder) { + for (Utf8TestCase testCase : testCases) { + // Configure decoder to fail on an error + decoder.reset(); + decoder.onMalformedInput(CodingErrorAction.REPORT); + decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + + // Add each byte one at a time. The decoder should fail as soon as + // an invalid sequence has been provided + int len = testCase.input.length; + ByteBuffer bb = ByteBuffer.allocate(len); + CharBuffer cb = CharBuffer.allocate(len); + for (int i = 0; i < len; i++) { + bb.put((byte) testCase.input[i]); + bb.flip(); + CoderResult cr = decoder.decode(bb, cb, false); + if (cr.isError()) { + Assert.assertEquals(testCase.description, + testCase.invalidIndex, i); + break; + } + bb.compact(); + } + + // Configure decoder to replace on an error + decoder.reset(); + decoder.onMalformedInput(CodingErrorAction.REPLACE); + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + + // Add each byte one at a time. + bb.clear(); + cb.clear(); + for (int i = 0; i < len; i++) { + bb.put((byte) testCase.input[i]); + bb.flip(); + CoderResult cr = decoder.decode(bb, cb, false); + if (cr.isError()) { + Assert.fail(testCase.description); + } + bb.compact(); + } + // For incomplete sequences at the end of the input need to tell + // the decoder the input has ended + bb.flip(); + CoderResult cr = decoder.decode(bb, cb, true); + if (cr.isError()) { + Assert.fail(testCase.description); + } + cb.flip(); + Assert.assertEquals(testCase.description, testCase.outputReplaced, + cb.toString()); + } + } + + + /** + * Encapsulates a single UTF-8 test case + */ + private static class Utf8TestCase { + private final String description; + private final int[] input; + private final int invalidIndex; + private final String outputReplaced; + + public Utf8TestCase(String description, int[] input, int invalidIndex, + String outputReplaced) { + this.description = description; + this.input = input; + this.invalidIndex = invalidIndex; + this.outputReplaced = outputReplaced; + } + } +} Propchange: tomcat/trunk/test/org/apache/tomcat/util/buf/TestUtf8Extended.java ------------------------------------------------------------------------------ svn:eol-style = native --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org